final changes from successful run

add length eviction if no compression
fixup some compression stuff
2026-05-08 11:47:09 +08:00 · 2026-04-22 14:57:57 -05:00 · 2026-04-16 01:10:11 -05:00 · 2026-04-14 00:22:52 -05:00 · 2026-04-14 00:16:04 -05:00 · 2026-04-04 14:59:18 -05:00
213 changed files with 4766 additions and 21426 deletions
--- a/.github/workflows/deploy-site.yml
+++ b/.github/workflows/deploy-site.yml
@@ -6,8 +6,6 @@ on:
    paths:
      - 'website/**'
      - 'landingpage/**'
-      - 'skills/**'
-      - 'optional-skills/**'
      - '.github/workflows/deploy-site.yml'
  workflow_dispatch:

@@ -36,16 +34,6 @@ jobs:
          cache: npm
          cache-dependency-path: website/package-lock.json

-      - uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Install PyYAML for skill extraction
-        run: pip install pyyaml
-
-      - name: Extract skill metadata for dashboard
-        run: python3 website/scripts/extract-skills.py
-
      - name: Install dependencies
        run: npm ci
        working-directory: website
--- a/.github/workflows/docs-site-checks.yml
+++ b/.github/workflows/docs-site-checks.yml
@@ -27,11 +27,8 @@ jobs:
        with:
          python-version: '3.11'

-      - name: Install Python dependencies
-        run: python -m pip install ascii-guard pyyaml
-
-      - name: Extract skill metadata for dashboard
-        run: python3 website/scripts/extract-skills.py
+      - name: Install ascii-guard
+        run: python -m pip install ascii-guard

      - name: Lint docs diagrams
        run: npm run lint:diagrams
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -10,7 +10,6 @@ Auth supports:
  - Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json) → Bearer auth
 """

-import copy
 import json
 import logging
 import os
@@ -950,69 +949,6 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
    return block


-def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None) -> Any:
-    """Recursively convert SDK objects to plain Python data structures.
-
-    Guards against circular references (``_path`` tracks ``id()`` of objects
-    on the *current* recursion path) and runaway depth (capped at 20 levels).
-    Uses path-based tracking so shared (but non-cyclic) objects referenced by
-    multiple siblings are converted correctly rather than being stringified.
-    """
-    _MAX_DEPTH = 20
-    if _depth > _MAX_DEPTH:
-        return str(value)
-
-    if _path is None:
-        _path = set()
-
-    obj_id = id(value)
-    if obj_id in _path:
-        return str(value)
-
-    if hasattr(value, "model_dump"):
-        _path.add(obj_id)
-        result = _to_plain_data(value.model_dump(), _depth=_depth + 1, _path=_path)
-        _path.discard(obj_id)
-        return result
-    if isinstance(value, dict):
-        _path.add(obj_id)
-        result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()}
-        _path.discard(obj_id)
-        return result
-    if isinstance(value, (list, tuple)):
-        _path.add(obj_id)
-        result = [_to_plain_data(v, _depth=_depth + 1, _path=_path) for v in value]
-        _path.discard(obj_id)
-        return result
-    if hasattr(value, "__dict__"):
-        _path.add(obj_id)
-        result = {
-            k: _to_plain_data(v, _depth=_depth + 1, _path=_path)
-            for k, v in vars(value).items()
-            if not k.startswith("_")
-        }
-        _path.discard(obj_id)
-        return result
-    return value
-
-
-def _extract_preserved_thinking_blocks(message: Dict[str, Any]) -> List[Dict[str, Any]]:
-    """Return Anthropic thinking blocks previously preserved on the message."""
-    raw_details = message.get("reasoning_details")
-    if not isinstance(raw_details, list):
-        return []
-
-    preserved: List[Dict[str, Any]] = []
-    for detail in raw_details:
-        if not isinstance(detail, dict):
-            continue
-        block_type = str(detail.get("type", "") or "").strip().lower()
-        if block_type not in {"thinking", "redacted_thinking"}:
-            continue
-        preserved.append(copy.deepcopy(detail))
-    return preserved
-
-
 def _convert_content_to_anthropic(content: Any) -> Any:
    """Convert OpenAI-style multimodal content arrays to Anthropic blocks."""
    if not isinstance(content, list):
@@ -1059,7 +995,7 @@ def convert_messages_to_anthropic(
            continue

        if role == "assistant":
-            blocks = _extract_preserved_thinking_blocks(m)
+            blocks = []
            if content:
                if isinstance(content, list):
                    converted_content = _convert_content_to_anthropic(content)
@@ -1343,7 +1279,6 @@ def normalize_anthropic_response(
    """
    text_parts = []
    reasoning_parts = []
-    reasoning_details = []
    tool_calls = []

    for block in response.content:
@@ -1351,9 +1286,6 @@ def normalize_anthropic_response(
            text_parts.append(block.text)
        elif block.type == "thinking":
            reasoning_parts.append(block.thinking)
-            block_dict = _to_plain_data(block)
-            if isinstance(block_dict, dict):
-                reasoning_details.append(block_dict)
        elif block.type == "tool_use":
            name = block.name
            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
@@ -1384,7 +1316,7 @@ def normalize_anthropic_response(
            tool_calls=tool_calls or None,
            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
            reasoning_content=None,
-            reasoning_details=reasoning_details or None,
+            reasoning_details=None,
        ),
        finish_reason,
    )
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1078,9 +1078,9 @@ def resolve_provider_client(
            tried_sources = list(pconfig.api_key_env_vars)
            if provider == "copilot":
                tried_sources.append("gh auth token")
-            logger.debug("resolve_provider_client: provider %s has no API "
-                         "key configured (tried: %s)",
-                         provider, ", ".join(tried_sources))
+            logger.warning("resolve_provider_client: provider %s has no API "
+                           "key configured (tried: %s)",
+                           provider, ", ".join(tried_sources))
            return None, None

        base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
--- a/agent/builtin_memory_provider.py
+++ b/agent/builtin_memory_provider.py
@@ -1,113 +0,0 @@
-"""BuiltinMemoryProvider — wraps MEMORY.md / USER.md as a MemoryProvider.
-
-Always registered as the first provider. Cannot be disabled or removed.
-This is the existing Hermes memory system exposed through the provider
-interface for compatibility with the MemoryManager.
-
-The actual storage logic lives in tools/memory_tool.py (MemoryStore).
-This provider is a thin adapter that delegates to MemoryStore and
-exposes the memory tool schema.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-from typing import Any, Dict, List, Optional
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-
-class BuiltinMemoryProvider(MemoryProvider):
-    """Built-in file-backed memory (MEMORY.md + USER.md).
-
-    Always active, never disabled by other providers. The `memory` tool
-    is handled by run_agent.py's agent-level tool interception (not through
-    the normal registry), so get_tool_schemas() returns an empty list —
-    the memory tool is already wired separately.
-    """
-
-    def __init__(
-        self,
-        memory_store=None,
-        memory_enabled: bool = False,
-        user_profile_enabled: bool = False,
-    ):
-        self._store = memory_store
-        self._memory_enabled = memory_enabled
-        self._user_profile_enabled = user_profile_enabled
-
-    @property
-    def name(self) -> str:
-        return "builtin"
-
-    def is_available(self) -> bool:
-        """Built-in memory is always available."""
-        return True
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        """Load memory from disk if not already loaded."""
-        if self._store is not None:
-            self._store.load_from_disk()
-
-    def system_prompt_block(self) -> str:
-        """Return MEMORY.md and USER.md content for the system prompt.
-
-        Uses the frozen snapshot captured at load time. This ensures the
-        system prompt stays stable throughout a session (preserving the
-        prompt cache), even though the live entries may change via tool calls.
-        """
-        if not self._store:
-            return ""
-
-        parts = []
-        if self._memory_enabled:
-            mem_block = self._store.format_for_system_prompt("memory")
-            if mem_block:
-                parts.append(mem_block)
-        if self._user_profile_enabled:
-            user_block = self._store.format_for_system_prompt("user")
-            if user_block:
-                parts.append(user_block)
-
-        return "\n\n".join(parts)
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        """Built-in memory doesn't do query-based recall — it's injected via system_prompt_block."""
-        return ""
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Built-in memory doesn't auto-sync turns — writes happen via the memory tool."""
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        """Return empty list.
-
-        The `memory` tool is an agent-level intercepted tool, handled
-        specially in run_agent.py before normal tool dispatch. It's not
-        part of the standard tool registry. We don't duplicate it here.
-        """
-        return []
-
-    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
-        """Not used — the memory tool is intercepted in run_agent.py."""
-        return json.dumps({"error": "Built-in memory tool is handled by the agent loop"})
-
-    def shutdown(self) -> None:
-        """No cleanup needed — files are saved on every write."""
-
-    # -- Property access for backward compatibility --------------------------
-
-    @property
-    def store(self):
-        """Access the underlying MemoryStore for legacy code paths."""
-        return self._store
-
-    @property
-    def memory_enabled(self) -> bool:
-        return self._memory_enabled
-
-    @property
-    def user_profile_enabled(self) -> bool:
-        return self._user_profile_enabled
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -301,8 +301,6 @@ Update the summary using this exact structure. PRESERVE all existing information

 Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.

-Write the summary in the same language the user was using in the conversation.
-
 Write only the summary body. Do not include any preamble or prefix."""
        else:
            # First compaction: summarize from scratch
@@ -341,8 +339,6 @@ Use this exact structure:

 Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions. The goal is to prevent the next assistant from repeating work or losing important details.

-Write the summary in the same language the user was using in the conversation.
-
 Write only the summary body. Do not include any preamble or prefix."""

        try:
--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -1,335 +0,0 @@
-"""MemoryManager — orchestrates the built-in memory provider plus at most
-ONE external plugin memory provider.
-
-Single integration point in run_agent.py. Replaces scattered per-backend
-code with one manager that delegates to registered providers.
-
-The BuiltinMemoryProvider is always registered first and cannot be removed.
-Only ONE external (non-builtin) provider is allowed at a time — attempting
-to register a second external provider is rejected with a warning.  This
-prevents tool schema bloat and conflicting memory backends.
-
-Usage in run_agent.py:
-    self._memory_manager = MemoryManager()
-    self._memory_manager.add_provider(BuiltinMemoryProvider(...))
-    # Only ONE of these:
-    self._memory_manager.add_provider(plugin_provider)
-
-    # System prompt
-    prompt_parts.append(self._memory_manager.build_system_prompt())
-
-    # Pre-turn
-    context = self._memory_manager.prefetch_all(user_message)
-
-    # Post-turn
-    self._memory_manager.sync_all(user_msg, assistant_response)
-    self._memory_manager.queue_prefetch_all(user_msg)
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-from typing import Any, Dict, List, Optional
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-
-class MemoryManager:
-    """Orchestrates the built-in provider plus at most one external provider.
-
-    The builtin provider is always first. Only one non-builtin (external)
-    provider is allowed.  Failures in one provider never block the other.
-    """
-
-    def __init__(self) -> None:
-        self._providers: List[MemoryProvider] = []
-        self._tool_to_provider: Dict[str, MemoryProvider] = {}
-        self._has_external: bool = False  # True once a non-builtin provider is added
-
-    # -- Registration --------------------------------------------------------
-
-    def add_provider(self, provider: MemoryProvider) -> None:
-        """Register a memory provider.
-
-        Built-in provider (name ``"builtin"``) is always accepted.
-        Only **one** external (non-builtin) provider is allowed — a second
-        attempt is rejected with a warning.
-        """
-        is_builtin = provider.name == "builtin"
-
-        if not is_builtin:
-            if self._has_external:
-                existing = next(
-                    (p.name for p in self._providers if p.name != "builtin"), "unknown"
-                )
-                logger.warning(
-                    "Rejected memory provider '%s' — external provider '%s' is "
-                    "already registered. Only one external memory provider is "
-                    "allowed at a time. Configure which one via memory.provider "
-                    "in config.yaml.",
-                    provider.name, existing,
-                )
-                return
-            self._has_external = True
-
-        self._providers.append(provider)
-
-        # Index tool names → provider for routing
-        for schema in provider.get_tool_schemas():
-            tool_name = schema.get("name", "")
-            if tool_name and tool_name not in self._tool_to_provider:
-                self._tool_to_provider[tool_name] = provider
-            elif tool_name in self._tool_to_provider:
-                logger.warning(
-                    "Memory tool name conflict: '%s' already registered by %s, "
-                    "ignoring from %s",
-                    tool_name,
-                    self._tool_to_provider[tool_name].name,
-                    provider.name,
-                )
-
-        logger.info(
-            "Memory provider '%s' registered (%d tools)",
-            provider.name,
-            len(provider.get_tool_schemas()),
-        )
-
-    @property
-    def providers(self) -> List[MemoryProvider]:
-        """All registered providers in order."""
-        return list(self._providers)
-
-    @property
-    def provider_names(self) -> List[str]:
-        """Names of all registered providers."""
-        return [p.name for p in self._providers]
-
-    def get_provider(self, name: str) -> Optional[MemoryProvider]:
-        """Get a provider by name, or None if not registered."""
-        for p in self._providers:
-            if p.name == name:
-                return p
-        return None
-
-    # -- System prompt -------------------------------------------------------
-
-    def build_system_prompt(self) -> str:
-        """Collect system prompt blocks from all providers.
-
-        Returns combined text, or empty string if no providers contribute.
-        Each non-empty block is labeled with the provider name.
-        """
-        blocks = []
-        for provider in self._providers:
-            try:
-                block = provider.system_prompt_block()
-                if block and block.strip():
-                    blocks.append(block)
-            except Exception as e:
-                logger.warning(
-                    "Memory provider '%s' system_prompt_block() failed: %s",
-                    provider.name, e,
-                )
-        return "\n\n".join(blocks)
-
-    # -- Prefetch / recall ---------------------------------------------------
-
-    def prefetch_all(self, query: str, *, session_id: str = "") -> str:
-        """Collect prefetch context from all providers.
-
-        Returns merged context text labeled by provider. Empty providers
-        are skipped. Failures in one provider don't block others.
-        """
-        parts = []
-        for provider in self._providers:
-            try:
-                result = provider.prefetch(query, session_id=session_id)
-                if result and result.strip():
-                    parts.append(result)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' prefetch failed (non-fatal): %s",
-                    provider.name, e,
-                )
-        return "\n\n".join(parts)
-
-    def queue_prefetch_all(self, query: str, *, session_id: str = "") -> None:
-        """Queue background prefetch on all providers for the next turn."""
-        for provider in self._providers:
-            try:
-                provider.queue_prefetch(query, session_id=session_id)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' queue_prefetch failed (non-fatal): %s",
-                    provider.name, e,
-                )
-
-    # -- Sync ----------------------------------------------------------------
-
-    def sync_all(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Sync a completed turn to all providers."""
-        for provider in self._providers:
-            try:
-                provider.sync_turn(user_content, assistant_content, session_id=session_id)
-            except Exception as e:
-                logger.warning(
-                    "Memory provider '%s' sync_turn failed: %s",
-                    provider.name, e,
-                )
-
-    # -- Tools ---------------------------------------------------------------
-
-    def get_all_tool_schemas(self) -> List[Dict[str, Any]]:
-        """Collect tool schemas from all providers."""
-        schemas = []
-        seen = set()
-        for provider in self._providers:
-            try:
-                for schema in provider.get_tool_schemas():
-                    name = schema.get("name", "")
-                    if name and name not in seen:
-                        schemas.append(schema)
-                        seen.add(name)
-            except Exception as e:
-                logger.warning(
-                    "Memory provider '%s' get_tool_schemas() failed: %s",
-                    provider.name, e,
-                )
-        return schemas
-
-    def get_all_tool_names(self) -> set:
-        """Return set of all tool names across all providers."""
-        return set(self._tool_to_provider.keys())
-
-    def has_tool(self, tool_name: str) -> bool:
-        """Check if any provider handles this tool."""
-        return tool_name in self._tool_to_provider
-
-    def handle_tool_call(
-        self, tool_name: str, args: Dict[str, Any], **kwargs
-    ) -> str:
-        """Route a tool call to the correct provider.
-
-        Returns JSON string result. Raises ValueError if no provider
-        handles the tool.
-        """
-        provider = self._tool_to_provider.get(tool_name)
-        if provider is None:
-            return json.dumps({"error": f"No memory provider handles tool '{tool_name}'"})
-        try:
-            return provider.handle_tool_call(tool_name, args, **kwargs)
-        except Exception as e:
-            logger.error(
-                "Memory provider '%s' handle_tool_call(%s) failed: %s",
-                provider.name, tool_name, e,
-            )
-            return json.dumps({"error": f"Memory tool '{tool_name}' failed: {e}"})
-
-    # -- Lifecycle hooks -----------------------------------------------------
-
-    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
-        """Notify all providers of a new turn.
-
-        kwargs may include: remaining_tokens, model, platform, tool_count.
-        """
-        for provider in self._providers:
-            try:
-                provider.on_turn_start(turn_number, message, **kwargs)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' on_turn_start failed: %s",
-                    provider.name, e,
-                )
-
-    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
-        """Notify all providers of session end."""
-        for provider in self._providers:
-            try:
-                provider.on_session_end(messages)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' on_session_end failed: %s",
-                    provider.name, e,
-                )
-
-    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
-        """Notify all providers before context compression.
-
-        Returns combined text from providers to include in the compression
-        summary prompt. Empty string if no provider contributes.
-        """
-        parts = []
-        for provider in self._providers:
-            try:
-                result = provider.on_pre_compress(messages)
-                if result and result.strip():
-                    parts.append(result)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' on_pre_compress failed: %s",
-                    provider.name, e,
-                )
-        return "\n\n".join(parts)
-
-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        """Notify external providers when the built-in memory tool writes.
-
-        Skips the builtin provider itself (it's the source of the write).
-        """
-        for provider in self._providers:
-            if provider.name == "builtin":
-                continue
-            try:
-                provider.on_memory_write(action, target, content)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' on_memory_write failed: %s",
-                    provider.name, e,
-                )
-
-    def on_delegation(self, task: str, result: str, *,
-                      child_session_id: str = "", **kwargs) -> None:
-        """Notify all providers that a subagent completed."""
-        for provider in self._providers:
-            try:
-                provider.on_delegation(
-                    task, result, child_session_id=child_session_id, **kwargs
-                )
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' on_delegation failed: %s",
-                    provider.name, e,
-                )
-
-    def shutdown_all(self) -> None:
-        """Shut down all providers (reverse order for clean teardown)."""
-        for provider in reversed(self._providers):
-            try:
-                provider.shutdown()
-            except Exception as e:
-                logger.warning(
-                    "Memory provider '%s' shutdown failed: %s",
-                    provider.name, e,
-                )
-
-    def initialize_all(self, session_id: str, **kwargs) -> None:
-        """Initialize all providers.
-
-        Automatically injects ``hermes_home`` into *kwargs* so that every
-        provider can resolve profile-scoped storage paths without importing
-        ``get_hermes_home()`` themselves.
-        """
-        if "hermes_home" not in kwargs:
-            from hermes_constants import get_hermes_home
-            kwargs["hermes_home"] = str(get_hermes_home())
-        for provider in self._providers:
-            try:
-                provider.initialize(session_id=session_id, **kwargs)
-            except Exception as e:
-                logger.warning(
-                    "Memory provider '%s' initialize failed: %s",
-                    provider.name, e,
-                )
--- a/agent/memory_provider.py
+++ b/agent/memory_provider.py
@@ -1,231 +0,0 @@
-"""Abstract base class for pluggable memory providers.
-
-Memory providers give the agent persistent recall across sessions. One
-external provider is active at a time alongside the always-on built-in
-memory (MEMORY.md / USER.md). The MemoryManager enforces this limit.
-
-Built-in memory is always active as the first provider and cannot be removed.
-External providers (Honcho, Hindsight, Mem0, etc.) are additive — they never
-disable the built-in store. Only one external provider runs at a time to
-prevent tool schema bloat and conflicting memory backends.
-
-Registration:
-  1. Built-in: BuiltinMemoryProvider — always present, not removable.
-  2. Plugins: Ship in plugins/memory/<name>/, activated by memory.provider config.
-
-Lifecycle (called by MemoryManager, wired in run_agent.py):
-  initialize()          — connect, create resources, warm up
-  system_prompt_block()  — static text for the system prompt
-  prefetch(query)        — background recall before each turn
-  sync_turn(user, asst)  — async write after each turn
-  get_tool_schemas()     — tool schemas to expose to the model
-  handle_tool_call()     — dispatch a tool call
-  shutdown()             — clean exit
-
-Optional hooks (override to opt in):
-  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
-  on_session_end(messages)               — end-of-session extraction
-  on_pre_compress(messages) -> str       — extract before context compression
-  on_memory_write(action, target, content) — mirror built-in memory writes
-  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
-"""
-
-from __future__ import annotations
-
-import logging
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
-
-logger = logging.getLogger(__name__)
-
-
-class MemoryProvider(ABC):
-    """Abstract base class for memory providers."""
-
-    @property
-    @abstractmethod
-    def name(self) -> str:
-        """Short identifier for this provider (e.g. 'builtin', 'honcho', 'hindsight')."""
-
-    # -- Core lifecycle (implement these) ------------------------------------
-
-    @abstractmethod
-    def is_available(self) -> bool:
-        """Return True if this provider is configured, has credentials, and is ready.
-
-        Called during agent init to decide whether to activate the provider.
-        Should not make network calls — just check config and installed deps.
-        """
-
-    @abstractmethod
-    def initialize(self, session_id: str, **kwargs) -> None:
-        """Initialize for a session.
-
-        Called once at agent startup. May create resources (banks, tables),
-        establish connections, start background threads, etc.
-
-        kwargs always include:
-          - hermes_home (str): The active HERMES_HOME directory path. Use this
-            for profile-scoped storage instead of hardcoding ``~/.hermes``.
-          - platform (str): "cli", "telegram", "discord", "cron", etc.
-
-        kwargs may also include:
-          - agent_context (str): "primary", "subagent", "cron", or "flush".
-            Providers should skip writes for non-primary contexts (cron system
-            prompts would corrupt user representations).
-          - agent_identity (str): Profile name (e.g. "coder"). Use for
-            per-profile provider identity scoping.
-          - agent_workspace (str): Shared workspace name (e.g. "hermes").
-          - parent_session_id (str): For subagents, the parent's session_id.
-          - user_id (str): Platform user identifier (gateway sessions).
-        """
-
-    def system_prompt_block(self) -> str:
-        """Return text to include in the system prompt.
-
-        Called during system prompt assembly. Return empty string to skip.
-        This is for STATIC provider info (instructions, status). Prefetched
-        recall context is injected separately via prefetch().
-        """
-        return ""
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        """Recall relevant context for the upcoming turn.
-
-        Called before each API call. Return formatted text to inject as
-        context, or empty string if nothing relevant. Implementations
-        should be fast — use background threads for the actual recall
-        and return cached results here.
-
-        session_id is provided for providers serving concurrent sessions
-        (gateway group chats, cached agents). Providers that don't need
-        per-session scoping can ignore it.
-        """
-        return ""
-
-    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        """Queue a background recall for the NEXT turn.
-
-        Called after each turn completes. The result will be consumed
-        by prefetch() on the next turn. Default is no-op — providers
-        that do background prefetching should override this.
-        """
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Persist a completed turn to the backend.
-
-        Called after each turn. Should be non-blocking — queue for
-        background processing if the backend has latency.
-        """
-
-    @abstractmethod
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        """Return tool schemas this provider exposes.
-
-        Each schema follows the OpenAI function calling format:
-        {"name": "...", "description": "...", "parameters": {...}}
-
-        Return empty list if this provider has no tools (context-only).
-        """
-
-    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
-        """Handle a tool call for one of this provider's tools.
-
-        Must return a JSON string (the tool result).
-        Only called for tool names returned by get_tool_schemas().
-        """
-        raise NotImplementedError(f"Provider {self.name} does not handle tool {tool_name}")
-
-    def shutdown(self) -> None:
-        """Clean shutdown — flush queues, close connections."""
-
-    # -- Optional hooks (override to opt in) ---------------------------------
-
-    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
-        """Called at the start of each turn with the user message.
-
-        Use for turn-counting, scope management, periodic maintenance.
-
-        kwargs may include: remaining_tokens, model, platform, tool_count.
-        Providers use what they need; extras are ignored.
-        """
-
-    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
-        """Called when a session ends (explicit exit or timeout).
-
-        Use for end-of-session fact extraction, summarization, etc.
-        messages is the full conversation history.
-
-        NOT called after every turn — only at actual session boundaries
-        (CLI exit, /reset, gateway session expiry).
-        """
-
-    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
-        """Called before context compression discards old messages.
-
-        Use to extract insights from messages about to be compressed.
-        messages is the list that will be summarized/discarded.
-
-        Return text to include in the compression summary prompt so the
-        compressor preserves provider-extracted insights. Return empty
-        string for no contribution (backwards-compatible default).
-        """
-        return ""
-
-    def on_delegation(self, task: str, result: str, *,
-                      child_session_id: str = "", **kwargs) -> None:
-        """Called on the PARENT agent when a subagent completes.
-
-        The parent's memory provider gets the task+result pair as an
-        observation of what was delegated and what came back. The subagent
-        itself has no provider session (skip_memory=True).
-
-        task: the delegation prompt
-        result: the subagent's final response
-        child_session_id: the subagent's session_id
-        """
-
-    def get_config_schema(self) -> List[Dict[str, Any]]:
-        """Return config fields this provider needs for setup.
-
-        Used by 'hermes memory setup' to walk the user through configuration.
-        Each field is a dict with:
-          key:         config key name (e.g. 'api_key', 'mode')
-          description: human-readable description
-          secret:      True if this should go to .env (default: False)
-          required:    True if required (default: False)
-          default:     default value (optional)
-          choices:     list of valid values (optional)
-          url:         URL where user can get this credential (optional)
-          env_var:     explicit env var name for secrets (default: auto-generated)
-
-        Return empty list if no config needed (e.g. local-only providers).
-        """
-        return []
-
-    def save_config(self, values: Dict[str, Any], hermes_home: str) -> None:
-        """Write non-secret config to the provider's native location.
-
-        Called by 'hermes memory setup' after collecting user inputs.
-        ``values`` contains only non-secret fields (secrets go to .env).
-        ``hermes_home`` is the active HERMES_HOME directory path.
-
-        Providers with native config files (JSON, YAML) should override
-        this to write to their expected location. Providers that use only
-        env vars can leave the default (no-op).
-
-        All new memory provider plugins MUST implement either:
-        - save_config() for native config file formats, OR
-        - use only env vars (in which case get_config_schema() fields
-          should all have ``env_var`` set and this method stays no-op).
-        """
-
-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        """Called when the built-in memory tool writes an entry.
-
-        action: 'add', 'replace', or 'remove'
-        target: 'memory' or 'user'
-        content: the entry content
-
-        Use to mirror built-in memory writes to your backend.
-        """
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -187,29 +187,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (

 # Model name substrings that trigger tool-use enforcement guidance.
 # Add new patterns here when a model family needs explicit steering.
-TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma")
-
-# Gemini/Gemma-specific operational guidance, adapted from OpenCode's gemini.txt.
-# Injected alongside TOOL_USE_ENFORCEMENT_GUIDANCE when the model is Gemini or Gemma.
-GOOGLE_MODEL_OPERATIONAL_GUIDANCE = (
-    "# Google model operational directives\n"
-    "Follow these operational rules strictly:\n"
-    "- **Absolute paths:** Always construct and use absolute file paths for all "
-    "file system operations. Combine the project root with relative paths.\n"
-    "- **Verify first:** Use read_file/search_files to check file contents and "
-    "project structure before making changes. Never guess at file contents.\n"
-    "- **Dependency checks:** Never assume a library is available. Check "
-    "package.json, requirements.txt, Cargo.toml, etc. before importing.\n"
-    "- **Conciseness:** Keep explanatory text brief — a few sentences, not "
-    "paragraphs. Focus on actions and results over narration.\n"
-    "- **Parallel tool calls:** When you need to perform multiple independent "
-    "operations (e.g. reading several files), make all the tool calls in a "
-    "single response rather than sequentially.\n"
-    "- **Non-interactive commands:** Use flags like -y, --yes, --non-interactive "
-    "to prevent CLI tools from hanging on prompts.\n"
-    "- **Keep going:** Work autonomously until the task is fully resolved. "
-    "Don't stop with a plan — execute it.\n"
-)
+TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex")

 # Model name substrings that should use the 'developer' role instead of
 # 'system' for the system prompt.  OpenAI's newer models (GPT-5, Codex)
@@ -674,73 +652,6 @@ def build_skills_system_prompt(
    return result


-def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -> str:
-    """Build a compact Nous subscription capability block for the system prompt."""
-    try:
-        from hermes_cli.nous_subscription import get_nous_subscription_features
-        from tools.tool_backend_helpers import managed_nous_tools_enabled
-    except Exception as exc:
-        logger.debug("Failed to import Nous subscription helper: %s", exc)
-        return ""
-
-    if not managed_nous_tools_enabled():
-        return ""
-
-    valid_names = set(valid_tool_names or set())
-    relevant_tool_names = {
-        "web_search",
-        "web_extract",
-        "browser_navigate",
-        "browser_snapshot",
-        "browser_click",
-        "browser_type",
-        "browser_scroll",
-        "browser_console",
-        "browser_close",
-        "browser_press",
-        "browser_get_images",
-        "browser_vision",
-        "image_generate",
-        "text_to_speech",
-        "terminal",
-        "process",
-        "execute_code",
-    }
-
-    if valid_names and not (valid_names & relevant_tool_names):
-        return ""
-
-    features = get_nous_subscription_features()
-
-    def _status_line(feature) -> str:
-        if feature.managed_by_nous:
-            return f"- {feature.label}: active via Nous subscription"
-        if feature.active:
-            current = feature.current_provider or "configured provider"
-            return f"- {feature.label}: currently using {current}"
-        if feature.included_by_default and features.nous_auth_present:
-            return f"- {feature.label}: included with Nous subscription, not currently selected"
-        if feature.key == "modal" and features.nous_auth_present:
-            return f"- {feature.label}: optional via Nous subscription"
-        return f"- {feature.label}: not currently available"
-
-    lines = [
-        "# Nous Subscription",
-        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browserbase) by default. Modal execution is optional.",
-        "Current capability status:",
-    ]
-    lines.extend(_status_line(feature) for feature in features.items())
-    lines.extend(
-        [
-            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys.",
-            "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
-            "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
-            "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
-        ]
-    )
-    return "\n".join(lines)
-
-
 # =========================================================================
 # Context files (SOUL.md, AGENTS.md, .cursorrules)
 # =========================================================================
--- a/agent/smart_model_routing.py
+++ b/agent/smart_model_routing.py
@@ -6,8 +6,6 @@ import os
 import re
 from typing import Any, Dict, Optional

-from utils import is_truthy_value
-
 _COMPLEX_KEYWORDS = {
    "debug",
    "debugging",
@@ -49,7 +47,13 @@ _URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)


 def _coerce_bool(value: Any, default: bool = False) -> bool:
-    return is_truthy_value(value, default=default)
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        return value.strip().lower() in {"1", "true", "yes", "on"}
+    return bool(value)


 def _coerce_int(value: Any, default: int) -> int:
--- a/cli.py
+++ b/cli.py
@@ -508,8 +508,6 @@ from tools.browser_tool import _emergency_cleanup_all_sessions as _cleanup_all_b

 # Guard to prevent cleanup from running multiple times on exit
 _cleanup_done = False
-# Weak reference to the active AIAgent for memory provider shutdown at exit
-_active_agent_ref = None

 def _run_cleanup():
    """Run resource cleanup exactly once."""
@@ -538,15 +536,6 @@ def _run_cleanup():
        shutdown_cached_clients()
    except Exception:
        pass
-    # Shut down memory provider (on_session_end + shutdown_all) at actual
-    # session boundary — NOT per-turn inside run_conversation().
-    try:
-        if _active_agent_ref and hasattr(_active_agent_ref, 'shutdown_memory_provider'):
-            _active_agent_ref.shutdown_memory_provider(
-                getattr(_active_agent_ref, 'conversation_history', None) or []
-            )
-    except Exception:
-        pass


 # =============================================================================
@@ -841,63 +830,6 @@ def _cprint(text: str):
    _pt_print(_PT_ANSI(text))


-# ---------------------------------------------------------------------------
-# File-drop detection — extracted as a pure function for testability.
-# ---------------------------------------------------------------------------
-
-_IMAGE_EXTENSIONS = frozenset({
-    '.png', '.jpg', '.jpeg', '.gif', '.webp',
-    '.bmp', '.tiff', '.tif', '.svg', '.ico',
-})
-
-
-def _detect_file_drop(user_input: str) -> "dict | None":
-    """Detect if *user_input* is a dragged/pasted file path, not a slash command.
-
-    When a user drags a file into the terminal, macOS pastes the absolute path
-    (e.g. ``/Users/roland/Desktop/file.png``) which starts with ``/`` and would
-    otherwise be mistaken for a slash command.
-
-    Returns a dict on match::
-
-        {
-            "path": Path,          # resolved file path
-            "is_image": bool,      # True when suffix is a known image type
-            "remainder": str,      # any text after the path
-        }
-
-    Returns ``None`` when the input is not a real file path.
-    """
-    if not isinstance(user_input, str) or not user_input.startswith("/"):
-        return None
-
-    # Walk the string absorbing backslash-escaped spaces ("\ ").
-    raw = user_input
-    pos = 0
-    while pos < len(raw):
-        ch = raw[pos]
-        if ch == '\\' and pos + 1 < len(raw) and raw[pos + 1] == ' ':
-            pos += 2  # skip escaped space
-        elif ch == ' ':
-            break
-        else:
-            pos += 1
-
-    first_token_raw = raw[:pos]
-    first_token = first_token_raw.replace('\\ ', ' ')
-    drop_path = Path(first_token)
-
-    if not drop_path.exists() or not drop_path.is_file():
-        return None
-
-    remainder = raw[pos:].strip()
-    return {
-        "path": drop_path,
-        "is_image": drop_path.suffix.lower() in _IMAGE_EXTENSIONS,
-        "remainder": remainder,
-    }
-
-
 class ChatConsole:
    """Rich Console adapter for prompt_toolkit's patch_stdout context.

@@ -1613,28 +1545,6 @@ class HermesCLI:
                pass
            return changed

-        if resolved_provider in {"opencode-zen", "opencode-go"}:
-            try:
-                from hermes_cli.models import normalize_opencode_model_id, opencode_model_api_mode
-
-                canonical = normalize_opencode_model_id(resolved_provider, current_model)
-                if canonical and canonical != current_model:
-                    if not self._model_is_default:
-                        self.console.print(
-                            f"[yellow]⚠️  Stripped provider prefix from '{current_model}'; using '{canonical}' for {resolved_provider}.[/]"
-                        )
-                    self.model = canonical
-                    current_model = canonical
-                    changed = True
-
-                resolved_mode = opencode_model_api_mode(resolved_provider, current_model)
-                if resolved_mode != self.api_mode:
-                    self.api_mode = resolved_mode
-                    changed = True
-            except Exception:
-                pass
-            return changed
-
        if resolved_provider != "openai-codex":
            return False

@@ -2229,7 +2139,7 @@ class HermesCLI:
                session_db=self._session_db,
                clarify_callback=self._clarify_callback,
                reasoning_callback=self._current_reasoning_callback(),
-
+                honcho_session_key=None,  # resolved by run_agent via config sessions map / title
                fallback_model=self._fallback_model,
                thinking_callback=self._on_thinking,
                checkpoints_enabled=self.checkpoints_enabled,
@@ -2241,9 +2151,6 @@ class HermesCLI:
                stream_delta_callback=self._stream_delta if self.streaming_enabled else None,
                tool_gen_callback=self._on_tool_gen_start if self.streaming_enabled else None,
            )
-            # Store reference for atexit memory provider shutdown
-            global _active_agent_ref
-            _active_agent_ref = self.agent
            # Route agent status output through prompt_toolkit so ANSI escape
            # sequences aren't garbled by patch_stdout's StdoutProxy (#2262).
            self.agent._print_fn = _cprint
@@ -3251,9 +3158,6 @@ class HermesCLI:

    def reset_conversation(self):
        """Reset the conversation by starting a new session."""
-        # Shut down memory provider before resetting — actual session boundary
-        if hasattr(self, 'agent') and self.agent:
-            self.agent.shutdown_memory_provider(self.conversation_history)
        self.new_session()
    
    def save_conversation(self):
@@ -3918,6 +3822,28 @@ class HermesCLI:
                            try:
                                if self._session_db.set_session_title(self.session_id, new_title):
                                    _cprint(f"  Session title set: {new_title}")
+                                    # Re-map Honcho session key to new title
+                                    if self.agent and getattr(self.agent, '_honcho', None):
+                                        try:
+                                            hcfg = self.agent._honcho_config
+                                            new_key = (
+                                                hcfg.resolve_session_name(
+                                                    session_title=new_title,
+                                                    session_id=self.agent.session_id,
+                                                )
+                                                if hcfg else new_title
+                                            )
+                                            if new_key and new_key != self.agent._honcho_session_key:
+                                                old_key = self.agent._honcho_session_key
+                                                self.agent._honcho.get_or_create(new_key)
+                                                self.agent._honcho_session_key = new_key
+                                                from tools.honcho_tools import set_session_context
+                                                set_session_context(self.agent._honcho, new_key)
+                                                from agent.display import honcho_session_line, write_tty
+                                                write_tty(honcho_session_line(hcfg.workspace_id, new_key) + "\n")
+                                                _cprint(f"  Honcho session: {old_key} → {new_key}")
+                                        except Exception:
+                                            pass
                                else:
                                    _cprint("  Session not found in database.")
                            except ValueError as e:
@@ -4382,6 +4308,7 @@ class HermesCLI:
                    user_message=btw_prompt,
                    conversation_history=history_snapshot,
                    task_id=task_id,
+                    sync_honcho=False,
                )

                response = (result.get("final_response") or "") if result else ""
@@ -4811,7 +4738,12 @@ class HermesCLI:
                f"  ✅ Compressed: {original_count} → {new_count} messages "
                f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)"
            )
-
+            # Flush Honcho async queue so queued messages land before context resets
+            if self.agent and getattr(self.agent, '_honcho', None):
+                try:
+                    self.agent._honcho.flush_all()
+                except Exception:
+                    pass
        except Exception as e:
            print(f"  ❌ Compression failed: {e}")

@@ -6472,6 +6404,17 @@ class HermesCLI:
        # One-line Honcho session indicator (TTY-only, not captured by agent).
        # Only show when the user explicitly configured Honcho for Hermes
        # (not auto-enabled from a stray HONCHO_API_KEY env var).
+        try:
+            from honcho_integration.client import HonchoClientConfig
+            from agent.display import honcho_session_line, write_tty
+            hcfg = HonchoClientConfig.from_global_config()
+            if hcfg.enabled and (hcfg.api_key or hcfg.base_url) and hcfg.explicitly_configured:
+                sname = hcfg.resolve_session_name(session_id=self.session_id)
+                if sname:
+                    write_tty(honcho_session_line(hcfg.workspace_id, sname) + "\n")
+        except Exception:
+            pass
+
        # If resuming a session, load history and display it immediately
        # so the user has context before typing their first message.
        if self._resumed:
@@ -7612,24 +7555,8 @@ class HermesCLI:
                    if isinstance(user_input, tuple):
                        user_input, submit_images = user_input
                    
-                    # Check for commands — but detect dragged/pasted file paths first.
-                    # See _detect_file_drop() for details.
-                    _file_drop = _detect_file_drop(user_input) if isinstance(user_input, str) else None
-                    if _file_drop:
-                        _drop_path = _file_drop["path"]
-                        _remainder = _file_drop["remainder"]
-                        if _file_drop["is_image"]:
-                            submit_images.append(_drop_path)
-                            user_input = _remainder or f"[User attached image: {_drop_path.name}]"
-                            _cprint(f"  📎 Auto-attached image: {_drop_path.name}")
-                        else:
-                            _cprint(f"  📄 Detected file: {_drop_path.name}")
-                            user_input = (
-                                f"[User attached file: {_drop_path}]"
-                                + (f"\n{_remainder}" if _remainder else "")
-                            )
-
-                    if not _file_drop and isinstance(user_input, str) and user_input.startswith("/"):
+                    # Check for commands
+                    if isinstance(user_input, str) and user_input.startswith("/"):
                        _cprint(f"\n⚙️  {user_input}")
                        if not self.process_command(user_input):
                            self._should_exit = True
@@ -7790,6 +7717,12 @@ class HermesCLI:
            set_sudo_password_callback(None)
            set_approval_callback(None)
            set_secret_capture_callback(None)
+            # Flush + shut down Honcho async writer (drains queue before exit)
+            if self.agent and getattr(self.agent, '_honcho', None):
+                try:
+                    self.agent._honcho.shutdown()
+                except (Exception, KeyboardInterrupt):
+                    pass
            # Close session in SQLite
            if hasattr(self, '_session_db') and self._session_db and self.agent:
                try:
@@ -8014,12 +7947,6 @@ def main(
                    if response:
                        print(response)
                    print(f"\nsession_id: {cli.session_id}")
-                    
-                    # Ensure proper exit code for automation wrappers
-                    sys.exit(1 if isinstance(result, dict) and result.get("failed") else 0)
-            
-            # Exit with error code if credentials or agent init fails
-            sys.exit(1)
        else:
            cli.show_banner()
            cli.console.print(f"[bold blue]Query:[/] {query}")
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -437,7 +437,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            provider_sort=pr.get("sort"),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
-            skip_memory=True,  # Cron system prompts would corrupt user representations
            platform="cron",
            session_id=_cron_session_id,
            session_db=_session_db,
--- a/environments/agent_loop.py
+++ b/environments/agent_loop.py
@@ -193,6 +193,10 @@ class HermesAgentLoop:

        import time as _time

+        prompt_token_ids = None
+        generation_token_ids = None
+        generation_log_probs = None
+
        for turn in range(self.max_turns):
            turn_start = _time.monotonic()

@@ -246,6 +250,12 @@ class HermesAgentLoop:
                )

            assistant_msg = response.choices[0].message
+            if hasattr(assistant_msg, "prompt_token_ids"):
+                prompt_token_ids = assistant_msg.prompt_token_ids
+            if hasattr(assistant_msg, "generation_token_ids"):
+                generation_token_ids = assistant_msg.generation_token_ids
+            if hasattr(assistant_msg, "generation_log_probs"):
+                generation_log_probs = assistant_msg.generation_log_probs

            # Extract reasoning content from the response (all provider formats)
            reasoning = _extract_reasoning_from_message(assistant_msg)
@@ -308,7 +318,10 @@ class HermesAgentLoop:
                    "content": assistant_msg.content or "",
                    "tool_calls": [_tc_to_dict(tc) for tc in assistant_msg.tool_calls],
                }
-
+                if prompt_token_ids is not None:
+                    msg_dict["prompt_token_ids"] = prompt_token_ids
+                    msg_dict["generation_token_ids"] = generation_token_ids
+                    msg_dict["generation_log_probs"] = generation_log_probs
                # Preserve reasoning_content for multi-turn chat template handling
                # (e.g., Kimi-K2's template renders <think> blocks differently
                # for history vs. the latest turn based on this field)
@@ -471,6 +484,10 @@ class HermesAgentLoop:
                }
                if reasoning:
                    msg_dict["reasoning_content"] = reasoning
+                if prompt_token_ids is not None:
+                    msg_dict["prompt_token_ids"] = prompt_token_ids
+                    msg_dict["generation_token_ids"] = generation_token_ids
+                    msg_dict["generation_log_probs"] = generation_log_probs
                messages.append(msg_dict)

                turn_elapsed = _time.monotonic() - turn_start
--- a/environments/check_gym_compat.py
+++ b/environments/check_gym_compat.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+"""
+Quick compatibility check: connect to a local OpenAI-compatible endpoint
+and run a single agent turn via HermesAgentLoop with all standard tools.
+
+Usage:
+    python environments/check_gym_compat.py                    # auto-detect model
+    python environments/check_gym_compat.py --model my-model   # explicit model
+    python environments/check_gym_compat.py --base-url http://... --model ...
+"""
+
+import asyncio
+import argparse
+import json
+import logging
+import sys
+from pathlib import Path
+
+# Ensure repo root is on sys.path when run as a standalone script
+_repo_root = str(Path(__file__).resolve().parent.parent)
+if _repo_root not in sys.path:
+    sys.path.insert(0, _repo_root)
+
+import requests
+from openai import AsyncOpenAI
+
+from environments.agent_loop import HermesAgentLoop, AgentResult
+from model_tools import get_tool_definitions
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s")
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Thin server wrapper — gives HermesAgentLoop the chat_completion() it wants
+# ---------------------------------------------------------------------------
+
+class OpenAIServer:
+    """Minimal async server wrapping an OpenAI-compatible endpoint."""
+
+    def __init__(self, base_url: str, model: str, api_key: str = "dummy"):
+        self.model = model
+        self.client = AsyncOpenAI(base_url=base_url, api_key=api_key)
+
+    async def chat_completion(self, **kwargs):
+        kwargs.setdefault("model", self.model)
+        return await self.client.chat.completions.create(**kwargs)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def detect_model(base_url: str) -> str:
+    try:
+        resp = requests.get(f"{base_url}/models", timeout=10)
+        resp.raise_for_status()
+        models = resp.json().get("data", [])
+        if not models:
+            print("WARNING: /v1/models returned no models")
+            return "default"
+        model_id = models[0]["id"]
+        print(f"Auto-detected model: {model_id}")
+        return model_id
+    except Exception as e:
+        print(f"Could not auto-detect model ({e}), falling back to 'default'")
+        return "default"
+
+
+async def run_check(base_url: str, model: str, message: str) -> AgentResult:
+    server = OpenAIServer(base_url=base_url, model=model)
+
+    # Get all default hermes tools
+    tool_schemas = get_tool_definitions(quiet_mode=False)
+    valid_names = {t["function"]["name"] for t in tool_schemas}
+
+    agent = HermesAgentLoop(
+        server=server,
+        tool_schemas=tool_schemas,
+        valid_tool_names=valid_names,
+        max_turns=5,
+    )
+
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant with access to tools."},
+        {"role": "user", "content": message},
+    ]
+
+    return await agent.run(messages)
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(description="Check gym endpoint compatibility")
+    parser.add_argument("--base-url", default="http://127.0.0.1:11746/v1")
+    parser.add_argument("--model", default=None)
+    parser.add_argument("--message", default="Hello! What's the current directory you're in?")
+    args = parser.parse_args()
+
+    model = args.model or detect_model(args.base_url)
+
+    print(f"\n{'='*60}")
+    print(f"Endpoint:  {args.base_url}")
+    print(f"Model:     {model}")
+    print(f"Message:   {args.message}")
+    print(f"{'='*60}\n")
+
+    try:
+        result = asyncio.run(run_check(args.base_url, model, args.message))
+
+        print(f"\n{'='*60}")
+        print(f"Turns used:         {result.turns_used}")
+        print(f"Finished naturally: {result.finished_naturally}")
+        print(f"Tool errors:        {len(result.tool_errors)}")
+        print(f"{'='*60}")
+
+        # Print the final assistant response
+        for msg in reversed(result.messages):
+            # if msg.get("role") == "assistant" and msg.get("content"):
+            #     print("\nRESPONSE:")
+            #     print(msg["content"])
+            #     break
+            print(msg)
+
+        if result.tool_errors:
+            print("\nTOOL ERRORS:")
+            for err in result.tool_errors:
+                print(f"  turn {err.turn}: {err.tool_name} — {err.error}")
+
+        status = "✅ passed" if result.finished_naturally else "⚠️  hit max turns"
+        print(f"\nGym compatibility check {status}")
+
+    except Exception as e:
+        print(f"\n❌ Gym compatibility check failed: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/environments/patches.py
+++ b/environments/patches.py
@@ -11,11 +11,11 @@ Solution:
    _AsyncWorker thread internally, making it safe for both CLI and Atropos use.
    No monkey-patching is required.

-    This module is kept for backward compatibility. apply_patches() is a no-op.
+    This module is kept for backward compatibility — apply_patches() is now a no-op.

 Usage:
    Call apply_patches() once at import time (done automatically by hermes_base_env.py).
-    This is idempotent and safe to call multiple times.
+    This is idempotent — calling it multiple times is safe.
 """

 import logging
@@ -26,10 +26,17 @@ _patches_applied = False


 def apply_patches():
-    """Apply all monkey patches needed for Atropos compatibility."""
+    """Apply all monkey patches needed for Atropos compatibility.
+
+    Now a no-op — Modal async safety is built directly into ModalEnvironment.
+    Safe to call multiple times.
+    """
    global _patches_applied
    if _patches_applied:
        return

-    logger.debug("apply_patches() called; no patches needed (async safety is built-in)")
+    # Modal async-safety is now built into tools/environments/modal.py
+    # via the _AsyncWorker class. No monkey-patching needed.
+    logger.debug("apply_patches() called — no patches needed (async safety is built-in)")
+
    _patches_applied = True
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -17,7 +17,6 @@ from typing import Dict, List, Optional, Any
 from enum import Enum

 from hermes_cli.config import get_hermes_home
-from utils import is_truthy_value

 logger = logging.getLogger(__name__)

@@ -26,6 +25,10 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
    """Coerce bool-ish config values, preserving a caller-provided default."""
    if value is None:
        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, int):
+        return value != 0
    if isinstance(value, str):
        lowered = value.strip().lower()
        if lowered in ("true", "1", "yes", "on"):
@@ -33,7 +36,7 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
        if lowered in ("false", "0", "no", "off"):
            return False
        return default
-    return is_truthy_value(value, default=default)
+    return default


 def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str:
@@ -905,3 +908,5 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            config.default_reset_policy.at_hour = int(reset_hour)
        except ValueError:
            pass
+
+
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -323,18 +323,7 @@ class SlackAdapter(BasePlatformAdapter):

        Prefers metadata thread_id (the thread parent's ts, set by the
        gateway) over reply_to (which may be a child message's ts).
-
-        When ``reply_in_thread`` is ``false`` in the platform extra config,
-        top-level channel messages receive direct channel replies instead of
-        thread replies.  Messages that originate inside an existing thread are
-        always replied to in-thread to preserve conversation context.
        """
-        # When reply_in_thread is disabled (default: True for backward compat),
-        # only thread messages that are already part of an existing thread.
-        if not self.config.extra.get("reply_in_thread", True):
-            existing_thread = (metadata or {}).get("thread_id") or (metadata or {}).get("thread_ts")
-            return existing_thread or None
-
        if metadata:
            if metadata.get("thread_id"):
                return metadata["thread_id"]
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -474,6 +474,8 @@ class GatewayRunner:
        # Persistent Honcho managers keyed by gateway session key.
        # This preserves write_frequency="session" semantics across short-lived
        # per-message AIAgent instances.
+        self._honcho_managers: Dict[str, Any] = {}
+        self._honcho_configs: Dict[str, Any] = {}



@@ -506,9 +508,61 @@ class GatewayRunner:
        # Track background tasks to prevent garbage collection mid-execution
        self._background_tasks: set = set()

+    def _get_or_create_gateway_honcho(self, session_key: str):
+        """Return a persistent Honcho manager/config pair for this gateway session."""
+        if not hasattr(self, "_honcho_managers"):
+            self._honcho_managers = {}
+        if not hasattr(self, "_honcho_configs"):
+            self._honcho_configs = {}

+        if session_key in self._honcho_managers:
+            return self._honcho_managers[session_key], self._honcho_configs.get(session_key)

+        try:
+            from honcho_integration.client import HonchoClientConfig, get_honcho_client
+            from honcho_integration.session import HonchoSessionManager

+            hcfg = HonchoClientConfig.from_global_config()
+            if not hcfg.enabled or not (hcfg.api_key or hcfg.base_url):
+                return None, hcfg
+
+            client = get_honcho_client(hcfg)
+            manager = HonchoSessionManager(
+                honcho=client,
+                config=hcfg,
+                context_tokens=hcfg.context_tokens,
+            )
+            self._honcho_managers[session_key] = manager
+            self._honcho_configs[session_key] = hcfg
+            return manager, hcfg
+        except Exception as e:
+            logger.debug("Gateway Honcho init failed for %s: %s", session_key, e)
+            return None, None
+
+    def _shutdown_gateway_honcho(self, session_key: str) -> None:
+        """Flush and close the persistent Honcho manager for a gateway session."""
+        managers = getattr(self, "_honcho_managers", None)
+        configs = getattr(self, "_honcho_configs", None)
+        if managers is None or configs is None:
+            return
+
+        manager = managers.pop(session_key, None)
+        configs.pop(session_key, None)
+        if not manager:
+            return
+        try:
+            manager.shutdown()
+        except Exception as e:
+            logger.debug("Gateway Honcho shutdown failed for %s: %s", session_key, e)
+
+    def _shutdown_all_gateway_honcho(self) -> None:
+        """Flush and close all persistent Honcho managers."""
+        managers = getattr(self, "_honcho_managers", None)
+        if not managers:
+            return
+        for session_key in list(managers.keys()):
+            self._shutdown_gateway_honcho(session_key)
+    
    # -- Setup skill availability ----------------------------------------

    def _has_setup_skill(self) -> bool:
@@ -573,6 +627,7 @@ class GatewayRunner:
    def _flush_memories_for_session(
        self,
        old_session_id: str,
+        honcho_session_key: Optional[str] = None,
    ):
        """Prompt the agent to save memories/skills before context is lost.

@@ -605,9 +660,9 @@ class GatewayRunner:
                model=model,
                max_iterations=8,
                quiet_mode=True,
-                skip_memory=True,  # Flush agent — no memory provider
                enabled_toolsets=["memory", "skills"],
                session_id=old_session_id,
+                honcho_session_key=honcho_session_key,
            )
            # Fully silence the flush agent — quiet_mode only suppresses init
            # messages; tool call output still leaks to the terminal through
@@ -670,14 +725,22 @@ class GatewayRunner:
            tmp_agent.run_conversation(
                user_message=flush_prompt,
                conversation_history=msgs,
+                sync_honcho=False,
            )
            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
+            # Flush any queued Honcho writes before the session is dropped
+            if getattr(tmp_agent, '_honcho', None):
+                try:
+                    tmp_agent._honcho.shutdown()
+                except Exception:
+                    pass
        except Exception as e:
            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)

    async def _async_flush_memories(
        self,
        old_session_id: str,
+        honcho_session_key: Optional[str] = None,
    ):
        """Run the sync memory flush in a thread pool so it won't block the event loop."""
        loop = asyncio.get_event_loop()
@@ -685,6 +748,7 @@ class GatewayRunner:
            None,
            self._flush_memories_for_session,
            old_session_id,
+            honcho_session_key,
        )

    @property
@@ -1227,14 +1291,7 @@ class GatewayRunner:
                    )
                    try:
                        await self._async_flush_memories(entry.session_id, key)
-                        # Shut down memory provider on the cached agent
-                        cached_agent = self._running_agents.get(key)
-                        if cached_agent and cached_agent is not _AGENT_PENDING_SENTINEL:
-                            try:
-                                if hasattr(cached_agent, 'shutdown_memory_provider'):
-                                    cached_agent.shutdown_memory_provider()
-                            except Exception:
-                                pass
+                        self._shutdown_gateway_honcho(key)
                        # Mark as flushed and persist to disk so the flag
                        # survives gateway restarts.
                        with self.session_store._lock:
@@ -1368,12 +1425,6 @@ class GatewayRunner:
                logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
            except Exception as e:
                logger.debug("Failed interrupting agent during shutdown: %s", e)
-            # Shut down memory provider at actual session boundary
-            try:
-                if hasattr(agent, 'shutdown_memory_provider'):
-                    agent.shutdown_memory_provider()
-            except Exception:
-                pass

        for platform, adapter in list(self.adapters.items()):
            try:
@@ -1395,6 +1446,7 @@ class GatewayRunner:
        self._running_agents.clear()
        self._pending_messages.clear()
        self._pending_approvals.clear()
+        self._shutdown_all_gateway_honcho()
        self._shutdown_event.set()
        
        from gateway.status import remove_pid_file, write_runtime_status
@@ -2397,8 +2449,7 @@ class GatewayRunner:
            )
        
        # One-time prompt if no home channel is set for this platform
-        # Skip for webhooks - they deliver directly to configured targets (github_comment, etc.)
-        if not history and source.platform and source.platform != Platform.LOCAL and source.platform != Platform.WEBHOOK:
+        if not history and source.platform and source.platform != Platform.LOCAL:
            platform_name = source.platform.value
            env_key = f"{platform_name.upper()}_HOME_CHANNEL"
            if not os.getenv(env_key):
@@ -2670,12 +2721,27 @@ class GatewayRunner:
            except Exception as e:
                logger.error("Process watcher setup error: %s", e)

-            # NOTE: Dangerous command approvals are now handled inline by the
-            # blocking gateway approval mechanism in tools/approval.py.  The agent
-            # thread blocks until the user responds with /approve or /deny, so by
-            # the time we reach here the approval has already been resolved.  The
-            # old post-loop pop_pending + approval_hint code was removed in favour
-            # of the blocking approach that mirrors CLI's synchronous input().
+            # Check if the agent encountered a dangerous command needing approval
+            try:
+                from tools.approval import pop_pending
+                import time as _time
+                pending = pop_pending(session_key)
+                if pending:
+                    pending["timestamp"] = _time.time()
+                    self._pending_approvals[session_key] = pending
+                    # Append structured instructions so the user knows how to respond
+                    cmd_preview = pending.get("command", "")
+                    if len(cmd_preview) > 200:
+                        cmd_preview = cmd_preview[:200] + "..."
+                    approval_hint = (
+                        f"\n\n⚠️ **Dangerous command requires approval:**\n"
+                        f"```\n{cmd_preview}\n```\n"
+                        f"Reply `/approve` to execute, `/approve session` to approve this pattern "
+                        f"for the session, or `/deny` to cancel."
+                    )
+                    response = (response or "") + approval_hint
+            except Exception as e:
+                logger.debug("Failed to check pending approvals: %s", e)
            
            # Save the full conversation to the transcript, including tool calls.
            # This preserves the complete agent loop (tool_calls, tool results,
@@ -2753,12 +2819,20 @@ class GatewayRunner:
                            skip_db=agent_persisted,
                        )
            
-            # Token counts and model are now persisted by the agent directly.
-            # Keep only last_prompt_tokens here for context-window tracking and
-            # compression decisions.
+            # Update session with actual prompt token count and model from the agent
            self.session_store.update_session(
                session_entry.session_key,
+                input_tokens=agent_result.get("input_tokens", 0),
+                output_tokens=agent_result.get("output_tokens", 0),
+                cache_read_tokens=agent_result.get("cache_read_tokens", 0),
+                cache_write_tokens=agent_result.get("cache_write_tokens", 0),
                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
+                model=agent_result.get("model"),
+                estimated_cost_usd=agent_result.get("estimated_cost_usd"),
+                cost_status=agent_result.get("cost_status"),
+                cost_source=agent_result.get("cost_source"),
+                provider=agent_result.get("provider"),
+                base_url=agent_result.get("base_url"),
            )

            # Auto voice reply: send TTS audio before the text response
@@ -2940,6 +3014,8 @@ class GatewayRunner:
                _flush_task.add_done_callback(self._background_tasks.discard)
        except Exception as e:
            logger.debug("Gateway memory flush on reset failed: %s", e)
+
+        self._shutdown_gateway_honcho(session_key)
        self._evict_cached_agent(session_key)
        
        # Reset the session
@@ -4090,6 +4166,7 @@ class GatewayRunner:
                    user_message=btw_prompt,
                    conversation_history=history_snapshot,
                    task_id=task_id,
+                    sync_honcho=False,
                )

            loop = asyncio.get_event_loop()
@@ -4471,6 +4548,8 @@ class GatewayRunner:
        except Exception as e:
            logger.debug("Memory flush on resume failed: %s", e)

+        self._shutdown_gateway_honcho(session_key)
+
        # Clear any running agent for this session key
        if session_key in self._running_agents:
            del self._running_agents[session_key]
@@ -4651,93 +4730,123 @@ class GatewayRunner:
    _APPROVAL_TIMEOUT_SECONDS = 300  # 5 minutes

    async def _handle_approve_command(self, event: MessageEvent) -> Optional[str]:
-        """Handle /approve command — unblock waiting agent thread(s).
+        """Handle /approve command — execute a pending dangerous command.

-        The agent thread(s) are blocked inside tools/approval.py waiting for
-        the user to respond.  This handler signals the event so the agent
-        resumes and the terminal_tool executes the command inline — the same
-        flow as the CLI's synchronous input() approval.
-
-        Supports multiple concurrent approvals (parallel subagents,
-        execute_code).  ``/approve`` resolves the oldest pending command;
-        ``/approve all`` resolves every pending command at once.
+        After execution, re-invokes the agent with the command result so it
+        can continue its multi-step task (fixes the "dead agent" bug where
+        the agent loop exited on approval_required and never resumed).

        Usage:
-            /approve              — approve oldest pending command once
-            /approve all          — approve ALL pending commands at once
-            /approve session      — approve oldest + remember for session
-            /approve all session  — approve all + remember for session
-            /approve always       — approve oldest + remember permanently
-            /approve all always   — approve all + remember permanently
+            /approve          — approve and execute the pending command
+            /approve session  — approve and remember for this session
+            /approve always   — approve this pattern permanently
        """
        source = event.source
        session_key = self._session_key_for_source(source)

-        from tools.approval import (
-            resolve_gateway_approval, has_blocking_approval,
-            pending_approval_count,
-        )
-
-        if not has_blocking_approval(session_key):
-            if session_key in self._pending_approvals:
-                self._pending_approvals.pop(session_key)
-                return "⚠️ Approval expired (agent is no longer waiting). Ask the agent to try again."
+        if session_key not in self._pending_approvals:
            return "No pending command to approve."

-        # Parse args: support "all", "all session", "all always", "session", "always"
-        args = event.get_command_args().strip().lower().split()
-        resolve_all = "all" in args
-        remaining = [a for a in args if a != "all"]
+        import time as _time
+        approval = self._pending_approvals[session_key]

-        if any(a in ("always", "permanent", "permanently") for a in remaining):
-            choice = "always"
+        # Check for timeout
+        ts = approval.get("timestamp", 0)
+        if _time.time() - ts > self._APPROVAL_TIMEOUT_SECONDS:
+            self._pending_approvals.pop(session_key, None)
+            return "⚠️ Approval expired (timed out after 5 minutes). Ask the agent to try again."
+
+        self._pending_approvals.pop(session_key)
+        cmd = approval["command"]
+        pattern_keys = approval.get("pattern_keys", [])
+        if not pattern_keys:
+            pk = approval.get("pattern_key", "")
+            pattern_keys = [pk] if pk else []
+
+        # Determine approval scope from args
+        args = event.get_command_args().strip().lower()
+        from tools.approval import approve_session, approve_permanent
+
+        if args in ("always", "permanent", "permanently"):
+            for pk in pattern_keys:
+                approve_permanent(pk)
            scope_msg = " (pattern approved permanently)"
-        elif any(a in ("session", "ses") for a in remaining):
-            choice = "session"
+        elif args in ("session", "ses"):
+            for pk in pattern_keys:
+                approve_session(session_key, pk)
            scope_msg = " (pattern approved for this session)"
        else:
-            choice = "once"
+            # One-time approval — just approve for session so the immediate
+            # replay works, but don't advertise it as session-wide
+            for pk in pattern_keys:
+                approve_session(session_key, pk)
            scope_msg = ""

-        count = resolve_gateway_approval(session_key, choice, resolve_all=resolve_all)
-        if not count:
-            return "No pending command to approve."
+        logger.info("User approved dangerous command via /approve: %s...%s", cmd[:60], scope_msg)
+        from tools.terminal_tool import terminal_tool
+        result = await asyncio.to_thread(terminal_tool, command=cmd, force=True)

-        count_msg = f" ({count} commands)" if count > 1 else ""
-        logger.info("User approved %d dangerous command(s) via /approve%s", count, scope_msg)
-        return f"✅ Command{'s' if count > 1 else ''} approved{scope_msg}{count_msg}. The agent is resuming..."
+        # Send immediate feedback so the user sees the command output right away
+        immediate_msg = f"✅ Command approved and executed{scope_msg}.\n\n```\n{result[:3500]}\n```"
+        adapter = self.adapters.get(source.platform)
+        if adapter:
+            try:
+                await adapter.send(source.chat_id, immediate_msg)
+            except Exception as e:
+                logger.warning("Failed to send approval feedback: %s", e)
+
+        # Re-invoke the agent with the command result so it can continue its task.
+        # The agent's conversation history (persisted in SQLite) already contains
+        # the tool call that returned approval_required — the continuation message
+        # provides the actual execution output so the agent can pick up where it
+        # left off.
+        continuation_text = (
+            f"[System: The user approved the previously blocked command and it has been executed.\n"
+            f"Command: {cmd}\n"
+            f"<command_output>\n{result[:3500]}\n</command_output>\n\n"
+            f"Continue with the task you were working on.]"
+        )
+
+        synthetic_event = MessageEvent(
+            text=continuation_text,
+            source=source,
+            message_id=f"approve-continuation-{uuid.uuid4().hex}",
+        )
+
+        async def _continue_agent():
+            try:
+                response = await self._handle_message(synthetic_event)
+                if response and adapter:
+                    await adapter.send(source.chat_id, response)
+            except Exception as e:
+                logger.error("Failed to continue agent after /approve: %s", e)
+                if adapter:
+                    try:
+                        await adapter.send(
+                            source.chat_id,
+                            f"⚠️ Failed to resume agent after approval: {e}"
+                        )
+                    except Exception:
+                        pass
+
+        _task = asyncio.create_task(_continue_agent())
+        self._background_tasks.add(_task)
+        _task.add_done_callback(self._background_tasks.discard)
+        # Return None — we already sent the immediate feedback and the agent
+        # continuation is running in the background.
+        return None

    async def _handle_deny_command(self, event: MessageEvent) -> str:
-        """Handle /deny command — reject pending dangerous command(s).
-
-        Signals blocked agent thread(s) with a 'deny' result so they receive
-        a definitive BLOCKED message, same as the CLI deny flow.
-
-        ``/deny`` denies the oldest; ``/deny all`` denies everything.
-        """
+        """Handle /deny command — reject a pending dangerous command."""
        source = event.source
        session_key = self._session_key_for_source(source)

-        from tools.approval import (
-            resolve_gateway_approval, has_blocking_approval,
-        )
-
-        if not has_blocking_approval(session_key):
-            if session_key in self._pending_approvals:
-                self._pending_approvals.pop(session_key)
-                return "❌ Command denied (approval was stale)."
+        if session_key not in self._pending_approvals:
            return "No pending command to deny."

-        args = event.get_command_args().strip().lower()
-        resolve_all = "all" in args
-
-        count = resolve_gateway_approval(session_key, "deny", resolve_all=resolve_all)
-        if not count:
-            return "No pending command to deny."
-
-        count_msg = f" ({count} commands)" if count > 1 else ""
-        logger.info("User denied %d dangerous command(s) via /deny", count)
-        return f"❌ Command{'s' if count > 1 else ''} denied{count_msg}."
+        self._pending_approvals.pop(session_key)
+        logger.info("User denied dangerous command via /deny")
+        return "❌ Command denied."

    async def _handle_update_command(self, event: MessageEvent) -> str:
        """Handle /update command — update Hermes Agent to the latest version.
@@ -5300,10 +5409,7 @@ class GatewayRunner:
            or os.getenv("HERMES_TOOL_PROGRESS_MODE")
            or "all"
        )
-        # Disable tool progress for webhooks - they don't support message editing,
-        # so each progress line would be sent as a separate message.
-        from gateway.config import Platform
-        tool_progress_enabled = progress_mode != "off" and source.platform != Platform.WEBHOOK
+        tool_progress_enabled = progress_mode != "off"
        
        # Queue for progress messages (thread-safe)
        progress_queue = queue.Queue() if tool_progress_enabled else None
@@ -5542,6 +5648,7 @@ class GatewayRunner:
                }

            pr = self._provider_routing
+            honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
            reasoning_config = self._load_reasoning_config()
            self._reasoning_config = reasoning_config
            # Set up streaming consumer if enabled
@@ -5614,6 +5721,9 @@ class GatewayRunner:
                    provider_data_collection=pr.get("data_collection"),
                    session_id=session_id,
                    platform=platform_key,
+                    honcho_session_key=session_key,
+                    honcho_manager=honcho_manager,
+                    honcho_config=honcho_config,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )
@@ -5719,42 +5829,7 @@ class GatewayRunner:
                            if _p:
                                _history_media_paths.add(_p)
            
-            # Register per-session gateway approval callback so dangerous
-            # command approval blocks the agent thread (mirrors CLI input()).
-            # The callback bridges sync→async to send the approval request
-            # to the user immediately.
-            from tools.approval import register_gateway_notify, unregister_gateway_notify
-
-            def _approval_notify_sync(approval_data: dict) -> None:
-                """Send the approval request to the user from the agent thread."""
-                cmd = approval_data.get("command", "")
-                cmd_preview = cmd[:200] + "..." if len(cmd) > 200 else cmd
-                desc = approval_data.get("description", "dangerous command")
-                msg = (
-                    f"⚠️ **Dangerous command requires approval:**\n"
-                    f"```\n{cmd_preview}\n```\n"
-                    f"Reason: {desc}\n\n"
-                    f"Reply `/approve` to execute, `/approve session` to approve this pattern "
-                    f"for the session, `/approve always` to approve permanently, or `/deny` to cancel."
-                )
-                try:
-                    asyncio.run_coroutine_threadsafe(
-                        _status_adapter.send(
-                            _status_chat_id,
-                            msg,
-                            metadata=_status_thread_metadata,
-                        ),
-                        _loop_for_step,
-                    ).result(timeout=15)
-                except Exception as _e:
-                    logger.error("Failed to send approval request: %s", _e)
-
-            _approval_session_key = session_key or ""
-            register_gateway_notify(_approval_session_key, _approval_notify_sync)
-            try:
-                result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
-            finally:
-                unregister_gateway_notify(_approval_session_key)
+            result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
            result_holder[0] = result

            # Signal the stream consumer that the agent is done
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -738,58 +738,71 @@ class SessionStore:
            except Exception as e:
                print(f"[gateway] Warning: Failed to create SQLite session: {e}")

-        # Seed new DM thread sessions with parent DM session history.
-        # When a bot reply creates a Slack thread and the user responds in it,
-        # the thread gets a new session (keyed by thread_ts).  Without seeding,
-        # the thread session starts with zero context — the user's original
-        # question and the bot's answer are invisible.  Fix: copy the parent
-        # DM session's transcript into the new thread session so context carries
-        # over while still keeping threads isolated from each other.
-        if (
-            source.chat_type == "dm"
-            and source.thread_id
-            and entry.created_at == entry.updated_at  # brand-new session
-            and not was_auto_reset
-        ):
-            parent_source = SessionSource(
-                platform=source.platform,
-                chat_id=source.chat_id,
-                chat_type="dm",
-                user_id=source.user_id,
-                # no thread_id — this is the parent DM session
-            )
-            parent_key = self._generate_session_key(parent_source)
-            with self._lock:
-                parent_entry = self._entries.get(parent_key)
-            if parent_entry and parent_entry.session_id != entry.session_id:
-                try:
-                    parent_history = self.load_transcript(parent_entry.session_id)
-                    if parent_history:
-                        self.rewrite_transcript(entry.session_id, parent_history)
-                        logger.info(
-                            "[Session] Seeded DM thread session %s with %d messages from parent %s",
-                            entry.session_id, len(parent_history), parent_entry.session_id,
-                        )
-                except Exception as e:
-                    logger.warning("[Session] Failed to seed thread session: %s", e)
-
        return entry

    def update_session(
        self,
        session_key: str,
+        input_tokens: int = 0,
+        output_tokens: int = 0,
+        cache_read_tokens: int = 0,
+        cache_write_tokens: int = 0,
        last_prompt_tokens: int = None,
+        model: str = None,
+        estimated_cost_usd: Optional[float] = None,
+        cost_status: Optional[str] = None,
+        cost_source: Optional[str] = None,
+        provider: Optional[str] = None,
+        base_url: Optional[str] = None,
    ) -> None:
-        """Update lightweight session metadata after an interaction."""
+        """Update a session's metadata after an interaction."""
+        db_session_id = None
+
        with self._lock:
            self._ensure_loaded_locked()

            if session_key in self._entries:
                entry = self._entries[session_key]
                entry.updated_at = _now()
+                # Direct assignment — the gateway receives cumulative totals
+                # from the cached agent, not per-call deltas.
+                entry.input_tokens = input_tokens
+                entry.output_tokens = output_tokens
+                entry.cache_read_tokens = cache_read_tokens
+                entry.cache_write_tokens = cache_write_tokens
                if last_prompt_tokens is not None:
                    entry.last_prompt_tokens = last_prompt_tokens
+                if estimated_cost_usd is not None:
+                    entry.estimated_cost_usd = estimated_cost_usd
+                if cost_status:
+                    entry.cost_status = cost_status
+                entry.total_tokens = (
+                    entry.input_tokens
+                    + entry.output_tokens
+                    + entry.cache_read_tokens
+                    + entry.cache_write_tokens
+                )
                self._save()
+                db_session_id = entry.session_id
+
+        if self._db and db_session_id:
+            try:
+                self._db.set_token_counts(
+                    db_session_id,
+                    input_tokens=input_tokens,
+                    output_tokens=output_tokens,
+                    cache_read_tokens=cache_read_tokens,
+                    cache_write_tokens=cache_write_tokens,
+                    estimated_cost_usd=estimated_cost_usd,
+                    cost_status=cost_status,
+                    cost_source=cost_source,
+                    billing_provider=provider,
+                    billing_base_url=base_url,
+                    model=model,
+                    absolute=True,
+                )
+            except Exception as e:
+                logger.debug("Session DB operation failed: %s", e)

    def reset_session(self, session_key: str) -> Optional[SessionEntry]:
        """Force reset a session, creating a new session ID."""
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -200,10 +200,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="opencode-go",
        name="OpenCode Go",
        auth_type="api_key",
-        # OpenCode Go mixes API surfaces by model:
-        # - GLM / Kimi use OpenAI-compatible chat completions under /v1
-        # - MiniMax models use Anthropic Messages under /v1/messages
-        # Keep the provider base at /v1 and select api_mode per-model.
        inference_base_url="https://opencode.ai/zen/go/v1",
        api_key_env_vars=("OPENCODE_GO_API_KEY",),
        base_url_env_var="OPENCODE_GO_BASE_URL",
@@ -1381,89 +1377,6 @@ def _agent_key_is_usable(state: Dict[str, Any], min_ttl_seconds: int) -> bool:
    return not _is_expiring(state.get("agent_key_expires_at"), min_ttl_seconds)


-def resolve_nous_access_token(
-    *,
-    timeout_seconds: float = 15.0,
-    insecure: Optional[bool] = None,
-    ca_bundle: Optional[str] = None,
-    refresh_skew_seconds: int = ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
-) -> str:
-    """Resolve a refresh-aware Nous Portal access token for managed tool gateways."""
-    with _auth_store_lock():
-        auth_store = _load_auth_store()
-        state = _load_provider_state(auth_store, "nous")
-
-        if not state:
-            raise AuthError(
-                "Hermes is not logged into Nous Portal.",
-                provider="nous",
-                relogin_required=True,
-            )
-
-        portal_base_url = (
-            _optional_base_url(state.get("portal_base_url"))
-            or os.getenv("HERMES_PORTAL_BASE_URL")
-            or os.getenv("NOUS_PORTAL_BASE_URL")
-            or DEFAULT_NOUS_PORTAL_URL
-        ).rstrip("/")
-        client_id = str(state.get("client_id") or DEFAULT_NOUS_CLIENT_ID)
-        verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state)
-
-        access_token = state.get("access_token")
-        refresh_token = state.get("refresh_token")
-        if not isinstance(access_token, str) or not access_token:
-            raise AuthError(
-                "No access token found for Nous Portal login.",
-                provider="nous",
-                relogin_required=True,
-            )
-
-        if not _is_expiring(state.get("expires_at"), refresh_skew_seconds):
-            return access_token
-
-        if not isinstance(refresh_token, str) or not refresh_token:
-            raise AuthError(
-                "Session expired and no refresh token is available.",
-                provider="nous",
-                relogin_required=True,
-            )
-
-        timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0)
-        with httpx.Client(
-            timeout=timeout,
-            headers={"Accept": "application/json"},
-            verify=verify,
-        ) as client:
-            refreshed = _refresh_access_token(
-                client=client,
-                portal_base_url=portal_base_url,
-                client_id=client_id,
-                refresh_token=refresh_token,
-            )
-
-        now = datetime.now(timezone.utc)
-        access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
-        state["access_token"] = refreshed["access_token"]
-        state["refresh_token"] = refreshed.get("refresh_token") or refresh_token
-        state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
-        state["scope"] = refreshed.get("scope") or state.get("scope")
-        state["obtained_at"] = now.isoformat()
-        state["expires_in"] = access_ttl
-        state["expires_at"] = datetime.fromtimestamp(
-            now.timestamp() + access_ttl,
-            tz=timezone.utc,
-        ).isoformat()
-        state["portal_base_url"] = portal_base_url
-        state["client_id"] = client_id
-        state["tls"] = {
-            "insecure": verify is False,
-            "ca_bundle": verify if isinstance(verify, str) else None,
-        }
-        _save_provider_state(auth_store, "nous", state)
-        _save_auth_store(auth_store)
-        return state["access_token"]
-
-
 def refresh_nous_oauth_pure(
    access_token: str,
    refresh_token: str,
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -22,8 +22,6 @@ import tempfile
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple

-from tools.tool_backend_helpers import managed_nous_tools_enabled as _managed_nous_tools_enabled
-
 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
 # Env var names written to .env that aren't in OPTIONAL_ENV_VARS
@@ -43,6 +41,7 @@ _EXTRA_ENV_KEYS = frozenset({
    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_HOME_ROOM",
 })
+
 import yaml

 from hermes_cli.colors import Colors, color
@@ -213,7 +212,6 @@ DEFAULT_CONFIG = {
    
    "terminal": {
        "backend": "local",
-        "modal_mode": "auto",
        "cwd": ".",  # Use current directory
        "timeout": 180,
        # Environment variables to pass through to sandboxed execution
@@ -428,11 +426,6 @@ DEFAULT_CONFIG = {
        "user_profile_enabled": True,
        "memory_char_limit": 2200,   # ~800 tokens at 2.75 chars/token
        "user_char_limit": 1375,     # ~500 tokens at 2.75 chars/token
-        # External memory provider plugin (empty = built-in only).
-        # Set to a provider name to activate: "openviking", "mem0",
-        # "hindsight", "holographic", "retaindb", "byterover".
-        # Only ONE external provider is allowed at a time.
-        "provider": "",
    },

    # Subagent delegation — override the provider:model used by delegate_task
@@ -539,7 +532,6 @@ ENV_VARS_BY_VERSION: Dict[int, List[str]] = {
    5: ["WHATSAPP_ENABLED", "WHATSAPP_MODE", "WHATSAPP_ALLOWED_USERS",
        "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"],
    10: ["TAVILY_API_KEY"],
-    11: ["TERMINAL_MODAL_MODE"],
 }

 # Required environment variables with metadata for migration prompts.
@@ -758,38 +750,6 @@ OPTIONAL_ENV_VARS = {
        "category": "tool",
        "advanced": True,
    },
-    "FIRECRAWL_GATEWAY_URL": {
-        "description": "Exact Firecrawl tool-gateway origin override for Nous Subscribers only (optional)",
-        "prompt": "Firecrawl gateway URL (leave empty to derive from domain)",
-        "url": None,
-        "password": False,
-        "category": "tool",
-        "advanced": True,
-    },
-    "TOOL_GATEWAY_DOMAIN": {
-        "description": "Shared tool-gateway domain suffix for Nous Subscribers only, used to derive vendor hosts, e.g. nousresearch.com -> firecrawl-gateway.nousresearch.com",
-        "prompt": "Tool-gateway domain suffix",
-        "url": None,
-        "password": False,
-        "category": "tool",
-        "advanced": True,
-    },
-    "TOOL_GATEWAY_SCHEME": {
-        "description": "Shared tool-gateway URL scheme for Nous Subscribers only, used to derive vendor hosts (`https` by default, set `http` for local gateway testing)",
-        "prompt": "Tool-gateway URL scheme",
-        "url": None,
-        "password": False,
-        "category": "tool",
-        "advanced": True,
-    },
-    "TOOL_GATEWAY_USER_TOKEN": {
-        "description": "Explicit Nous Subscriber access token for tool-gateway requests (optional; otherwise read from the Hermes auth store)",
-        "prompt": "Tool-gateway user token",
-        "url": None,
-        "password": True,
-        "category": "tool",
-        "advanced": True,
-    },
    "TAVILY_API_KEY": {
        "description": "Tavily API key for AI-native web search, extract, and crawl",
        "prompt": "Tavily API key",
@@ -1119,15 +1079,6 @@ OPTIONAL_ENV_VARS = {
    },
 }

-if not _managed_nous_tools_enabled():
-    for _hidden_var in (
-        "FIRECRAWL_GATEWAY_URL",
-        "TOOL_GATEWAY_DOMAIN",
-        "TOOL_GATEWAY_SCHEME",
-        "TOOL_GATEWAY_USER_TOKEN",
-    ):
-        OPTIONAL_ENV_VARS.pop(_hidden_var, None)
-

 def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
    """
@@ -2043,9 +1994,7 @@ def set_config_value(key: str, value: str):
    # Check if it's an API key (goes to .env)
    api_keys = [
        'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY',
-        'EXA_API_KEY', 'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL',
-        'FIRECRAWL_GATEWAY_URL', 'TOOL_GATEWAY_DOMAIN', 'TOOL_GATEWAY_SCHEME',
-        'TOOL_GATEWAY_USER_TOKEN', 'TAVILY_API_KEY',
+        'EXA_API_KEY', 'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'TAVILY_API_KEY',
        'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID', 'BROWSER_USE_API_KEY',
        'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
        'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
@@ -2101,7 +2050,6 @@ def set_config_value(key: str, value: str):
    # config.yaml is authoritative, but terminal_tool only reads TERMINAL_ENV etc.
    _config_to_env_sync = {
        "terminal.backend": "TERMINAL_ENV",
-        "terminal.modal_mode": "TERMINAL_MODAL_MODE",
        "terminal.docker_image": "TERMINAL_DOCKER_IMAGE",
        "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "terminal.modal_image": "TERMINAL_MODAL_IMAGE",
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -55,7 +55,7 @@ def _has_provider_env_config(content: str) -> bool:
 def _honcho_is_configured_for_doctor() -> bool:
    """Return True when Honcho is configured, even if this process has no active session."""
    try:
-        from plugins.memory.honcho.client import HonchoClientConfig
+        from honcho_integration.client import HonchoClientConfig

        cfg = HonchoClientConfig.from_global_config()
        return bool(cfg.enabled and (cfg.api_key or cfg.base_url))
@@ -709,19 +709,19 @@ def run_doctor(args):
    print(color("◆ Honcho Memory", Colors.CYAN, Colors.BOLD))

    try:
-        from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
+        from honcho_integration.client import HonchoClientConfig, resolve_config_path
        hcfg = HonchoClientConfig.from_global_config()
        _honcho_cfg_path = resolve_config_path()

        if not _honcho_cfg_path.exists():
-            check_warn("Honcho config not found", "run: hermes memory setup")
+            check_warn("Honcho config not found", "run: hermes honcho setup")
        elif not hcfg.enabled:
            check_info(f"Honcho disabled (set enabled: true in {_honcho_cfg_path} to activate)")
        elif not (hcfg.api_key or hcfg.base_url):
-            check_fail("Honcho API key or base URL not set", "run: hermes memory setup")
-            issues.append("No Honcho API key — run 'hermes memory setup'")
+            check_fail("Honcho API key or base URL not set", "run: hermes honcho setup")
+            issues.append("No Honcho API key — run 'hermes honcho setup'")
        else:
-            from plugins.memory.honcho.client import get_honcho_client, reset_honcho_client
+            from honcho_integration.client import get_honcho_client, reset_honcho_client
            reset_honcho_client()
            try:
                get_honcho_client(hcfg)
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -858,10 +858,10 @@ def cmd_setup(args):
 def cmd_model(args):
    """Select default model — starts with provider selection, then model picker."""
    _require_tty("model")
-    select_provider_and_model(args=args)
+    select_provider_and_model()


-def select_provider_and_model(args=None):
+def select_provider_and_model():
    """Core provider selection + model picking logic.

    Shared by ``cmd_model`` (``hermes model``) and the setup wizard
@@ -1006,7 +1006,7 @@ def select_provider_and_model(args=None):
    if selected_provider == "openrouter":
        _model_flow_openrouter(config, current_model)
    elif selected_provider == "nous":
-        _model_flow_nous(config, current_model, args=args)
+        _model_flow_nous(config, current_model)
    elif selected_provider == "openai-codex":
        _model_flow_openai_codex(config, current_model)
    elif selected_provider == "copilot-acp":
@@ -1112,7 +1112,7 @@ def _model_flow_openrouter(config, current_model=""):
        print("No change.")


-def _model_flow_nous(config, current_model="", args=None):
+def _model_flow_nous(config, current_model=""):
    """Nous Portal provider: ensure logged in, then pick model."""
    from hermes_cli.auth import (
        get_provider_auth_state, _prompt_model_selection, _save_model_choice,
@@ -1120,11 +1120,7 @@ def _model_flow_nous(config, current_model="", args=None):
        fetch_nous_models, AuthError, format_auth_error,
        _login_nous, PROVIDER_REGISTRY,
    )
-    from hermes_cli.config import get_env_value, save_config, save_env_value
-    from hermes_cli.nous_subscription import (
-        apply_nous_provider_defaults,
-        get_nous_subscription_explainer_lines,
-    )
+    from hermes_cli.config import get_env_value, save_env_value
    import argparse

    state = get_provider_auth_state("nous")
@@ -1133,19 +1129,11 @@ def _model_flow_nous(config, current_model="", args=None):
        print()
        try:
            mock_args = argparse.Namespace(
-                portal_url=getattr(args, "portal_url", None),
-                inference_url=getattr(args, "inference_url", None),
-                client_id=getattr(args, "client_id", None),
-                scope=getattr(args, "scope", None),
-                no_browser=bool(getattr(args, "no_browser", False)),
-                timeout=getattr(args, "timeout", None) or 15.0,
-                ca_bundle=getattr(args, "ca_bundle", None),
-                insecure=bool(getattr(args, "insecure", False)),
+                portal_url=None, inference_url=None, client_id=None,
+                scope=None, no_browser=False, timeout=15.0,
+                ca_bundle=None, insecure=False,
            )
            _login_nous(mock_args, PROVIDER_REGISTRY["nous"])
-            print()
-            for line in get_nous_subscription_explainer_lines():
-                print(line)
        except SystemExit:
            print("Login cancelled or failed.")
            return
@@ -1194,36 +1182,7 @@ def _model_flow_nous(config, current_model="", args=None):
        # Reactivate Nous as the provider and update config
        inference_url = creds.get("base_url", "")
        _update_config_for_provider("nous", inference_url)
-        current_model_cfg = config.get("model")
-        if isinstance(current_model_cfg, dict):
-            model_cfg = dict(current_model_cfg)
-        elif isinstance(current_model_cfg, str) and current_model_cfg.strip():
-            model_cfg = {"default": current_model_cfg.strip()}
-        else:
-            model_cfg = {}
-        model_cfg["provider"] = "nous"
-        model_cfg["default"] = selected
-        if inference_url and inference_url.strip():
-            model_cfg["base_url"] = inference_url.rstrip("/")
-        else:
-            model_cfg.pop("base_url", None)
-        config["model"] = model_cfg
-        # Clear any custom endpoint that might conflict
-        if get_env_value("OPENAI_BASE_URL"):
-            save_env_value("OPENAI_BASE_URL", "")
-            save_env_value("OPENAI_API_KEY", "")
-        changed_defaults = apply_nous_provider_defaults(config)
-        save_config(config)
        print(f"Default model set to: {selected} (via Nous Portal)")
-        if "tts" in changed_defaults:
-            print("TTS provider set to: OpenAI TTS via your Nous subscription")
-        else:
-            current_tts = str(config.get("tts", {}).get("provider") or "edge")
-            if current_tts.lower() not in {"", "edge"}:
-                print(f"Keeping your existing TTS provider: {current_tts}")
-        print()
-        for line in get_nous_subscription_explainer_lines():
-            print(line)
    else:
        print("No change.")

@@ -1645,8 +1604,81 @@ def _model_flow_named_custom(config, provider_info):
    print(f"   Provider: {name} ({base_url})")


-# Curated model lists for direct API-key providers — single source in models.py
-from hermes_cli.models import _PROVIDER_MODELS
+# Curated model lists for direct API-key providers
+_PROVIDER_MODELS = {
+    "copilot-acp": [
+        "copilot-acp",
+    ],
+    "copilot": [
+        "gpt-5.4",
+        "gpt-5.4-mini",
+        "gpt-5-mini",
+        "gpt-5.3-codex",
+        "gpt-5.2-codex",
+        "gpt-4.1",
+        "gpt-4o",
+        "gpt-4o-mini",
+        "claude-opus-4.6",
+        "claude-sonnet-4.6",
+        "claude-sonnet-4.5",
+        "claude-haiku-4.5",
+        "gemini-2.5-pro",
+        "grok-code-fast-1",
+    ],
+    "zai": [
+        "glm-5",
+        "glm-4.7",
+        "glm-4.5",
+        "glm-4.5-flash",
+    ],
+    "kimi-coding": [
+        "kimi-for-coding",
+        "kimi-k2.5",
+        "kimi-k2-thinking",
+        "kimi-k2-thinking-turbo",
+        "kimi-k2-turbo-preview",
+        "kimi-k2-0905-preview",
+    ],
+    "moonshot": [
+        "kimi-k2.5",
+        "kimi-k2-thinking",
+        "kimi-k2-turbo-preview",
+        "kimi-k2-0905-preview",
+    ],
+    "minimax": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
+        "MiniMax-M2.5",
+        "MiniMax-M2.5-highspeed",
+        "MiniMax-M2.1",
+    ],
+    "minimax-cn": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
+        "MiniMax-M2.5",
+        "MiniMax-M2.5-highspeed",
+        "MiniMax-M2.1",
+    ],
+    "kilocode": [
+        "anthropic/claude-opus-4.6",
+        "anthropic/claude-sonnet-4.6",
+        "openai/gpt-5.4",
+        "google/gemini-3-pro-preview",
+        "google/gemini-3-flash-preview",
+    ],
+    # Curated HF model list — only agentic models that map to OpenRouter defaults.
+    # Format: HF model ID → OpenRouter equivalent noted in comment
+    "huggingface": [
+        "Qwen/Qwen3.5-397B-A17B",                  # ↔ qwen/qwen3.5-plus
+        "Qwen/Qwen3.5-35B-A3B",                     # ↔ qwen/qwen3.5-35b-a3b
+        "deepseek-ai/DeepSeek-V3.2",                # ↔ deepseek/deepseek-chat
+        "moonshotai/Kimi-K2.5",                      # ↔ moonshotai/kimi-k2.5
+        "MiniMaxAI/MiniMax-M2.5",                    # ↔ minimax/minimax-m2.5
+        "zai-org/GLM-5",                             # ↔ z-ai/glm-5
+        "XiaomiMiMo/MiMo-V2-Flash",                 # ↔ xiaomi/mimo-v2-pro
+        "moonshotai/Kimi-K2-Thinking",               # ↔ moonshotai/kimi-k2-thinking
+    ],
+}


 def _current_reasoning_effort(config) -> str:
@@ -2115,13 +2147,12 @@ def _model_flow_kimi(config, current_model=""):


 def _model_flow_api_key_provider(config, provider_id, current_model=""):
-    """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
+    """Generic flow for API-key providers (z.ai, MiniMax)."""
    from hermes_cli.auth import (
        PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
        deactivate_provider,
    )
    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
-    from hermes_cli.models import fetch_api_models, opencode_model_api_mode, normalize_opencode_model_id

    pconfig = PROVIDER_REGISTRY[provider_id]
    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
@@ -2175,6 +2206,7 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        # Curated list is substantial — use it directly, skip live probe
        live_models = None
    else:
+        from hermes_cli.models import fetch_api_models
        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
        live_models = fetch_api_models(api_key_for_probe, effective_base)

@@ -2187,11 +2219,6 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
            print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
        # else: no defaults either, will fall through to raw input

-    if provider_id in {"opencode-zen", "opencode-go"}:
-        model_list = [normalize_opencode_model_id(provider_id, mid) for mid in model_list]
-        current_model = normalize_opencode_model_id(provider_id, current_model)
-        model_list = list(dict.fromkeys(mid for mid in model_list if mid))
-
    if model_list:
        selected = _prompt_model_selection(model_list, current_model=current_model)
    else:
@@ -2201,12 +2228,9 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
            selected = None

    if selected:
-        if provider_id in {"opencode-zen", "opencode-go"}:
-            selected = normalize_opencode_model_id(provider_id, selected)
-
        _save_model_choice(selected)

-        # Update config with provider, base URL, and provider-specific API mode
+        # Update config with provider and base URL
        cfg = load_config()
        model = cfg.get("model")
        if not isinstance(model, dict):
@@ -2214,10 +2238,7 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
            cfg["model"] = model
        model["provider"] = provider_id
        model["base_url"] = effective_base
-        if provider_id in {"opencode-zen", "opencode-go"}:
-            model["api_mode"] = opencode_model_api_mode(provider_id, selected)
-        else:
-            model.pop("api_mode", None)
+        model.pop("api_mode", None)  # let runtime auto-detect from URL
        save_config(cfg)
        deactivate_provider()

@@ -2624,15 +2645,24 @@ def _update_via_zip(args):
    if removed:
        print(f"  ✓ Cleared {removed} stale __pycache__ director{'y' if removed == 1 else 'ies'}")
    
-    # Reinstall Python dependencies. Prefer .[all], but if one optional extra
-    # breaks on this machine, keep base deps and reinstall the remaining extras
-    # individually so update does not silently strip working capabilities.
+    # Reinstall Python dependencies (try .[all] first for optional extras,
+    # fall back to . if extras fail — mirrors the install script behavior)
    print("→ Updating Python dependencies...")
    import subprocess
    uv_bin = shutil.which("uv")
    if uv_bin:
        uv_env = {**os.environ, "VIRTUAL_ENV": str(PROJECT_ROOT / "venv")}
-        _install_python_dependencies_with_optional_fallback([uv_bin, "pip"], env=uv_env)
+        try:
+            subprocess.run(
+                [uv_bin, "pip", "install", "-e", ".[all]", "--quiet"],
+                cwd=PROJECT_ROOT, check=True, env=uv_env,
+            )
+        except subprocess.CalledProcessError:
+            print("  ⚠ Optional extras failed, installing base dependencies...")
+            subprocess.run(
+                [uv_bin, "pip", "install", "-e", ".", "--quiet"],
+                cwd=PROJECT_ROOT, check=True, env=uv_env,
+            )
    else:
        # Use sys.executable to explicitly call the venv's pip module,
        # avoiding PEP 668 'externally-managed-environment' errors on Debian/Ubuntu.
@@ -2647,7 +2677,11 @@ def _update_via_zip(args):
                cwd=PROJECT_ROOT,
                check=True,
            )
-        _install_python_dependencies_with_optional_fallback(pip_cmd)
+        try:
+            subprocess.run(pip_cmd + ["install", "-e", ".[all]", "--quiet"], cwd=PROJECT_ROOT, check=True)
+        except subprocess.CalledProcessError:
+            print("  ⚠ Optional extras failed, installing base dependencies...")
+            subprocess.run(pip_cmd + ["install", "-e", ".", "--quiet"], cwd=PROJECT_ROOT, check=True)
    
    # Sync skills
    try:
@@ -2836,107 +2870,16 @@ def _restore_stashed_changes(
    return True

 def _invalidate_update_cache():
-    """Delete the update-check cache for ALL profiles so no banner
-    reports a stale "commits behind" count after a successful update.
-
-    The git repo is shared across profiles — when one profile runs
-    ``hermes update``, every profile is now current.
-    """
-    homes = []
-    # Default profile home
-    default_home = Path.home() / ".hermes"
-    homes.append(default_home)
-    # Named profiles under ~/.hermes/profiles/
-    profiles_root = default_home / "profiles"
-    if profiles_root.is_dir():
-        for entry in profiles_root.iterdir():
-            if entry.is_dir():
-                homes.append(entry)
-    for home in homes:
-        try:
-            cache_file = home / ".update_check"
-            if cache_file.exists():
-                cache_file.unlink()
-        except Exception:
-            pass
-
-
-def _load_installable_optional_extras() -> list[str]:
-    """Return the optional extras referenced by the ``all`` group.
-
-    Only extras that ``[all]`` actually pulls in are retried individually.
-    Extras outside ``[all]`` (e.g. ``rl``, ``yc-bench``) are intentionally
-    excluded — they have heavy or platform-specific deps that most users
-    never installed.
-    """
+    """Delete the update-check cache so ``hermes --version`` doesn't
+    report a stale "commits behind" count after a successful update."""
    try:
-        import tomllib
-        with (PROJECT_ROOT / "pyproject.toml").open("rb") as handle:
-            project = tomllib.load(handle).get("project", {})
+        cache_file = Path(os.getenv(
+            "HERMES_HOME", Path.home() / ".hermes"
+        )) / ".update_check"
+        if cache_file.exists():
+            cache_file.unlink()
    except Exception:
-        return []
-
-    optional_deps = project.get("optional-dependencies", {})
-    if not isinstance(optional_deps, dict):
-        return []
-
-    # Parse the [all] group to find which extras it references.
-    # Entries look like "hermes-agent[matrix]" or "package-name[extra]".
-    all_refs = optional_deps.get("all", [])
-    referenced: list[str] = []
-    for ref in all_refs:
-        if "[" in ref and "]" in ref:
-            name = ref.split("[", 1)[1].split("]", 1)[0]
-            if name in optional_deps:
-                referenced.append(name)
-
-    return referenced
-
-
-
-def _install_python_dependencies_with_optional_fallback(
-    install_cmd_prefix: list[str],
-    *,
-    env: dict[str, str] | None = None,
-) -> None:
-    """Install base deps plus as many optional extras as the environment supports."""
-    try:
-        subprocess.run(
-            install_cmd_prefix + ["install", "-e", ".[all]", "--quiet"],
-            cwd=PROJECT_ROOT,
-            check=True,
-            env=env,
-        )
-        return
-    except subprocess.CalledProcessError:
-        print("  ⚠ Optional extras failed, reinstalling base dependencies and retrying extras individually...")
-
-    subprocess.run(
-        install_cmd_prefix + ["install", "-e", ".", "--quiet"],
-        cwd=PROJECT_ROOT,
-        check=True,
-        env=env,
-    )
-
-    failed_extras: list[str] = []
-    installed_extras: list[str] = []
-    for extra in _load_installable_optional_extras():
-        try:
-            subprocess.run(
-                install_cmd_prefix + ["install", "-e", f".[{extra}]", "--quiet"],
-                cwd=PROJECT_ROOT,
-                check=True,
-                env=env,
-            )
-            installed_extras.append(extra)
-        except subprocess.CalledProcessError:
-            failed_extras.append(extra)
-
-    if installed_extras:
-        print(f"  ✓ Reinstalled optional extras individually: {', '.join(installed_extras)}")
-    if failed_extras:
-        print(f"  ⚠ Skipped optional extras that still failed: {', '.join(failed_extras)}")
-
+        pass

 def cmd_update(args):
    """Update Hermes Agent to the latest version."""
@@ -3112,14 +3055,23 @@ def cmd_update(args):
        if removed:
            print(f"  ✓ Cleared {removed} stale __pycache__ director{'y' if removed == 1 else 'ies'}")
        
-        # Reinstall Python dependencies. Prefer .[all], but if one optional extra
-        # breaks on this machine, keep base deps and reinstall the remaining extras
-        # individually so update does not silently strip working capabilities.
+        # Reinstall Python dependencies (try .[all] first for optional extras,
+        # fall back to . if extras fail — mirrors the install script behavior)
        print("→ Updating Python dependencies...")
        uv_bin = shutil.which("uv")
        if uv_bin:
            uv_env = {**os.environ, "VIRTUAL_ENV": str(PROJECT_ROOT / "venv")}
-            _install_python_dependencies_with_optional_fallback([uv_bin, "pip"], env=uv_env)
+            try:
+                subprocess.run(
+                    [uv_bin, "pip", "install", "-e", ".[all]", "--quiet"],
+                    cwd=PROJECT_ROOT, check=True, env=uv_env,
+                )
+            except subprocess.CalledProcessError:
+                print("  ⚠ Optional extras failed, installing base dependencies...")
+                subprocess.run(
+                    [uv_bin, "pip", "install", "-e", ".", "--quiet"],
+                    cwd=PROJECT_ROOT, check=True, env=uv_env,
+                )
        else:
            # Use sys.executable to explicitly call the venv's pip module,
            # avoiding PEP 668 'externally-managed-environment' errors on Debian/Ubuntu.
@@ -3134,7 +3086,11 @@ def cmd_update(args):
                    cwd=PROJECT_ROOT,
                    check=True,
                )
-            _install_python_dependencies_with_optional_fallback(pip_cmd)
+            try:
+                subprocess.run(pip_cmd + ["install", "-e", ".[all]", "--quiet"], cwd=PROJECT_ROOT, check=True)
+            except subprocess.CalledProcessError:
+                print("  ⚠ Optional extras failed, installing base dependencies...")
+                subprocess.run(pip_cmd + ["install", "-e", ".", "--quiet"], cwd=PROJECT_ROOT, check=True)
        
        # Check for Node.js deps
        if (PROJECT_ROOT / "package.json").exists():
@@ -3204,15 +3160,6 @@ def cmd_update(args):
        except Exception:
            pass  # profiles module not available or no profiles

-        # Sync Honcho host blocks to all profiles
-        try:
-            from plugins.memory.honcho.cli import sync_honcho_profiles_quiet
-            synced = sync_honcho_profiles_quiet()
-            if synced:
-                print(f"\n-> Honcho: synced {synced} profile(s)")
-        except Exception:
-            pass  # honcho plugin not installed or not configured
-
        # Check for config migrations
        print()
        print("→ Checking configuration for new options...")
@@ -3555,15 +3502,6 @@ def cmd_profile(args):
                else:
                    print(f"Cloned config, .env, SOUL.md from {source_label}.")

-            # Auto-clone Honcho config for the new profile (only with --clone/--clone-all)
-            if clone or clone_all:
-                try:
-                    from plugins.memory.honcho.cli import clone_honcho_for_profile
-                    if clone_honcho_for_profile(name):
-                        print(f"Honcho config cloned (peer: {name})")
-                except Exception:
-                    pass  # Honcho plugin not installed or not configured
-
            # Seed bundled skills (skip if --clone-all already copied them)
            if not clone_all:
                result = seed_profile_skills(profile_dir)
@@ -3905,44 +3843,6 @@ For more help on a command:
        help="Select default model and provider",
        description="Interactively select your inference provider and default model"
    )
-    model_parser.add_argument(
-        "--portal-url",
-        help="Portal base URL for Nous login (default: production portal)"
-    )
-    model_parser.add_argument(
-        "--inference-url",
-        help="Inference API base URL for Nous login (default: production inference API)"
-    )
-    model_parser.add_argument(
-        "--client-id",
-        default=None,
-        help="OAuth client id to use for Nous login (default: hermes-cli)"
-    )
-    model_parser.add_argument(
-        "--scope",
-        default=None,
-        help="OAuth scope to request for Nous login"
-    )
-    model_parser.add_argument(
-        "--no-browser",
-        action="store_true",
-        help="Do not attempt to open the browser automatically during Nous login"
-    )
-    model_parser.add_argument(
-        "--timeout",
-        type=float,
-        default=15.0,
-        help="HTTP request timeout in seconds for Nous login (default: 15)"
-    )
-    model_parser.add_argument(
-        "--ca-bundle",
-        help="Path to CA bundle PEM file for Nous TLS verification"
-    )
-    model_parser.add_argument(
-        "--insecure",
-        action="store_true",
-        help="Disable TLS verification for Nous login (testing only)"
-    )
    model_parser.set_defaults(func=cmd_model)

    # =========================================================================
@@ -4450,30 +4350,27 @@ For more help on a command:
    plugins_parser.set_defaults(func=cmd_plugins)

    # =========================================================================
-    # honcho command — Honcho-specific config (peer, mode, tokens, profiles)
-    # Provider selection happens via 'hermes memory setup'.
+    # honcho command
    # =========================================================================
    honcho_parser = subparsers.add_parser(
        "honcho",
-        help="Manage Honcho memory provider config (peer, mode, profiles)",
+        help="Manage Honcho AI memory integration",
        description=(
-            "Configure Honcho-specific settings. Honcho is now a memory provider\n"
-            "plugin — initial setup is via 'hermes memory setup'. These commands\n"
-            "manage Honcho's own config: peer names, memory mode, token budgets,\n"
-            "per-profile host blocks, and cross-profile observability."
+            "Honcho is a memory layer that persists across sessions.\n\n"
+            "Each conversation is stored as a peer interaction in a workspace. "
+            "Honcho builds a representation of the user over time — conclusions, "
+            "patterns, context — and surfaces the relevant slice at the start of "
+            "each turn so Hermes knows who you are without you having to repeat yourself.\n\n"
+            "Modes: hybrid (Honcho + local MEMORY.md), honcho (Honcho only), "
+            "local (MEMORY.md only). Write frequency is configurable so memory "
+            "writes never block the response."
        ),
        formatter_class=__import__("argparse").RawDescriptionHelpFormatter,
    )
-    honcho_parser.add_argument(
-        "--target-profile", metavar="NAME", dest="target_profile",
-        help="Target a specific profile's Honcho config without switching",
-    )
    honcho_subparsers = honcho_parser.add_subparsers(dest="honcho_command")

-    honcho_subparsers.add_parser("setup", help="Initial Honcho setup (redirects to hermes memory setup)")
-    honcho_status = honcho_subparsers.add_parser("status", help="Show current Honcho config and connection status")
-    honcho_status.add_argument("--all", action="store_true", help="Show config overview across all profiles")
-    honcho_subparsers.add_parser("peers", help="Show peer identities across all profiles")
+    honcho_subparsers.add_parser("setup", help="Interactive setup wizard for Honcho integration")
+    honcho_subparsers.add_parser("status", help="Show current Honcho config and connection status")
    honcho_subparsers.add_parser("sessions", help="List known Honcho session mappings")

    honcho_map = honcho_subparsers.add_parser(
@@ -4533,60 +4430,13 @@ For more help on a command:
        "migrate",
        help="Step-by-step migration guide from openclaw-honcho to Hermes Honcho",
    )
-    honcho_subparsers.add_parser("enable", help="Enable Honcho for the active profile")
-    honcho_subparsers.add_parser("disable", help="Disable Honcho for the active profile")
-    honcho_subparsers.add_parser("sync", help="Sync Honcho config to all existing profiles")

    def cmd_honcho(args):
-        sub = getattr(args, "honcho_command", None)
-        if sub == "setup":
-            # Redirect to the generic memory setup
-            print("\n  Honcho is now configured via the memory provider system.")
-            print("  Running 'hermes memory setup'...\n")
-            from hermes_cli.memory_setup import memory_command
-            memory_command(args)
-            return
-        from plugins.memory.honcho.cli import honcho_command
+        from honcho_integration.cli import honcho_command
        honcho_command(args)

    honcho_parser.set_defaults(func=cmd_honcho)

-    # =========================================================================
-    # memory command
-    # =========================================================================
-    memory_parser = subparsers.add_parser(
-        "memory",
-        help="Configure external memory provider",
-        description=(
-            "Set up and manage external memory provider plugins.\n\n"
-            "Available providers: honcho, openviking, mem0, hindsight,\n"
-            "holographic, retaindb, byterover.\n\n"
-            "Only one external provider can be active at a time.\n"
-            "Built-in memory (MEMORY.md/USER.md) is always active."
-        ),
-    )
-    memory_sub = memory_parser.add_subparsers(dest="memory_command")
-    memory_sub.add_parser("setup", help="Interactive provider selection and configuration")
-    memory_sub.add_parser("status", help="Show current memory provider config")
-    memory_off_p = memory_sub.add_parser("off", help="Disable external provider (built-in only)")
-
-    def cmd_memory(args):
-        sub = getattr(args, "memory_command", None)
-        if sub == "off":
-            from hermes_cli.config import load_config, save_config
-            config = load_config()
-            if not isinstance(config.get("memory"), dict):
-                config["memory"] = {}
-            config["memory"]["provider"] = ""
-            save_config(config)
-            print("\n  ✓ Memory provider: built-in only")
-            print("  Saved to config.yaml\n")
-        else:
-            from hermes_cli.memory_setup import memory_command
-            memory_command(args)
-
-    memory_parser.set_defaults(func=cmd_memory)
-
    # =========================================================================
    # tools command
    # =========================================================================
--- a/hermes_cli/memory_setup.py
+++ b/hermes_cli/memory_setup.py
@@ -1,451 +0,0 @@
-"""hermes memory setup|status — configure memory provider plugins.
-
-Auto-detects installed memory providers via the plugin system.
-Interactive curses-based UI for provider selection, then walks through
-the provider's config schema. Writes config to config.yaml + .env.
-"""
-
-from __future__ import annotations
-
-import getpass
-import os
-import sys
-from pathlib import Path
-
-
-# ---------------------------------------------------------------------------
-# Curses-based interactive picker (same pattern as hermes tools)
-# ---------------------------------------------------------------------------
-
-def _curses_select(title: str, items: list[tuple[str, str]], default: int = 0) -> int:
-    """Interactive single-select with arrow keys.
-
-    items: list of (label, description) tuples.
-    Returns selected index, or default on escape/quit.
-    """
-    try:
-        import curses
-        result = [default]
-
-        def _menu(stdscr):
-            curses.curs_set(0)
-            if curses.has_colors():
-                curses.start_color()
-                curses.use_default_colors()
-                curses.init_pair(1, curses.COLOR_GREEN, -1)
-                curses.init_pair(2, curses.COLOR_YELLOW, -1)
-                curses.init_pair(3, curses.COLOR_CYAN, -1)
-            cursor = default
-
-            while True:
-                stdscr.clear()
-                max_y, max_x = stdscr.getmaxyx()
-
-                # Title
-                try:
-                    stdscr.addnstr(0, 0, title, max_x - 1,
-                                   curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0))
-                    stdscr.addnstr(1, 0, "  ↑↓ navigate  ⏎ select  q quit", max_x - 1,
-                                   curses.color_pair(3) if curses.has_colors() else curses.A_DIM)
-                except curses.error:
-                    pass
-
-                for i, (label, desc) in enumerate(items):
-                    y = i + 3
-                    if y >= max_y - 1:
-                        break
-                    arrow = "→" if i == cursor else " "
-                    line = f" {arrow}  {label}"
-                    if desc:
-                        line += f"  {desc}"
-
-                    attr = curses.A_NORMAL
-                    if i == cursor:
-                        attr = curses.A_BOLD
-                        if curses.has_colors():
-                            attr |= curses.color_pair(1)
-                    try:
-                        stdscr.addnstr(y, 0, line[:max_x - 1], max_x - 1, attr)
-                    except curses.error:
-                        pass
-
-                stdscr.refresh()
-                key = stdscr.getch()
-
-                if key in (curses.KEY_UP, ord('k')):
-                    cursor = (cursor - 1) % len(items)
-                elif key in (curses.KEY_DOWN, ord('j')):
-                    cursor = (cursor + 1) % len(items)
-                elif key in (curses.KEY_ENTER, 10, 13):
-                    result[0] = cursor
-                    return
-                elif key in (27, ord('q')):
-                    return
-
-        curses.wrapper(_menu)
-        return result[0]
-
-    except Exception:
-        # Fallback: numbered input
-        print(f"\n  {title}\n")
-        for i, (label, desc) in enumerate(items):
-            marker = "→" if i == default else " "
-            d = f"  {desc}" if desc else ""
-            print(f"  {marker} {i + 1}. {label}{d}")
-        while True:
-            try:
-                val = input(f"\n  Select [1-{len(items)}] ({default + 1}): ")
-                if not val:
-                    return default
-                idx = int(val) - 1
-                if 0 <= idx < len(items):
-                    return idx
-            except (ValueError, EOFError):
-                return default
-
-
-def _prompt(label: str, default: str | None = None, secret: bool = False) -> str:
-    """Prompt for a value with optional default and secret masking."""
-    suffix = f" [{default}]" if default else ""
-    if secret:
-        sys.stdout.write(f"  {label}{suffix}: ")
-        sys.stdout.flush()
-        if sys.stdin.isatty():
-            val = getpass.getpass(prompt="")
-        else:
-            val = sys.stdin.readline().strip()
-    else:
-        sys.stdout.write(f"  {label}{suffix}: ")
-        sys.stdout.flush()
-        val = sys.stdin.readline().strip()
-    return val or (default or "")
-
-
-# ---------------------------------------------------------------------------
-# Provider discovery
-# ---------------------------------------------------------------------------
-
-def _install_dependencies(provider_name: str) -> None:
-    """Install pip dependencies declared in plugin.yaml."""
-    import subprocess
-    from pathlib import Path as _Path
-
-    plugin_dir = _Path(__file__).parent.parent / "plugins" / "memory" / provider_name
-    yaml_path = plugin_dir / "plugin.yaml"
-    if not yaml_path.exists():
-        return
-
-    try:
-        import yaml
-        with open(yaml_path) as f:
-            meta = yaml.safe_load(f) or {}
-    except Exception:
-        return
-
-    pip_deps = meta.get("pip_dependencies", [])
-    if not pip_deps:
-        return
-
-    # pip name → import name mapping for packages where they differ
-    _IMPORT_NAMES = {
-        "honcho-ai": "honcho",
-        "mem0ai": "mem0",
-        "hindsight-client": "hindsight_client",
-    }
-
-    # Check which packages are missing
-    missing = []
-    for dep in pip_deps:
-        import_name = _IMPORT_NAMES.get(dep, dep.replace("-", "_").split("[")[0])
-        try:
-            __import__(import_name)
-        except ImportError:
-            missing.append(dep)
-
-    if not missing:
-        return
-
-    print(f"\n  Installing dependencies: {', '.join(missing)}")
-    try:
-        subprocess.run(
-            [sys.executable, "-m", "pip", "install", "--quiet"] + missing,
-            check=True, timeout=120,
-            capture_output=True,
-        )
-        print(f"  ✓ Installed {', '.join(missing)}")
-    except subprocess.CalledProcessError as e:
-        print(f"  ⚠ Failed to install {', '.join(missing)}")
-        stderr = (e.stderr or b"").decode()[:200]
-        if stderr:
-            print(f"    {stderr}")
-        print(f"  Run manually: pip install {' '.join(missing)}")
-    except Exception as e:
-        print(f"  ⚠ Install failed: {e}")
-        print(f"  Run manually: pip install {' '.join(missing)}")
-
-    # Also show external dependencies (non-pip) if any
-    ext_deps = meta.get("external_dependencies", [])
-    for dep in ext_deps:
-        dep_name = dep.get("name", "")
-        check_cmd = dep.get("check", "")
-        install_cmd = dep.get("install", "")
-        if check_cmd:
-            try:
-                subprocess.run(
-                    check_cmd, shell=True, capture_output=True, timeout=5
-                )
-            except Exception:
-                if install_cmd:
-                    print(f"\n  ⚠ '{dep_name}' not found. Install with:")
-                    print(f"    {install_cmd}")
-
-
-def _get_available_providers() -> list:
-    """Discover memory providers from plugins/memory/.
-
-    Returns list of (name, description, provider_instance) tuples.
-    """
-    try:
-        from plugins.memory import discover_memory_providers, load_memory_provider
-        raw = discover_memory_providers()
-    except Exception:
-        raw = []
-
-    results = []
-    for name, desc, available in raw:
-        try:
-            provider = load_memory_provider(name)
-            if not provider:
-                continue
-        except Exception:
-            continue
-        # Override description with setup hint
-        schema = provider.get_config_schema() if hasattr(provider, "get_config_schema") else []
-        has_secrets = any(f.get("secret") for f in schema)
-        if has_secrets:
-            setup_hint = "requires API key"
-        elif not schema:
-            setup_hint = "no setup needed"
-        else:
-            setup_hint = "local"
-        results.append((name, setup_hint, provider))
-    return results
-
-
-# ---------------------------------------------------------------------------
-# Setup wizard
-# ---------------------------------------------------------------------------
-
-def cmd_setup(args) -> None:
-    """Interactive memory provider setup wizard."""
-    from hermes_cli.config import load_config, save_config
-
-    providers = _get_available_providers()
-
-    if not providers:
-        print("\n  No memory provider plugins detected.")
-        print("  Install a plugin to ~/.hermes/plugins/ and try again.\n")
-        return
-
-    # Build picker items
-    items = []
-    for name, desc, _ in providers:
-        items.append((name, f"— {desc}"))
-    items.append(("Built-in only", "— MEMORY.md / USER.md (default)"))
-
-    builtin_idx = len(items) - 1
-    selected = _curses_select("Memory provider setup", items, default=builtin_idx)
-
-    config = load_config()
-    if not isinstance(config.get("memory"), dict):
-        config["memory"] = {}
-
-    # Built-in only
-    if selected >= len(providers) or selected < 0:
-        config["memory"]["provider"] = ""
-        save_config(config)
-        print("\n  ✓ Memory provider: built-in only")
-        print("  Saved to config.yaml\n")
-        return
-
-    name, _, provider = providers[selected]
-
-    # Install pip dependencies if declared in plugin.yaml
-    _install_dependencies(name)
-
-    schema = provider.get_config_schema() if hasattr(provider, "get_config_schema") else []
-
-    # Provider config section
-    provider_config = config["memory"].get(name, {})
-    if not isinstance(provider_config, dict):
-        provider_config = {}
-
-    env_path = Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))) / ".env"
-    env_writes = {}
-
-    if schema:
-        print(f"\n  Configuring {name}:\n")
-
-        for field in schema:
-            key = field["key"]
-            desc = field.get("description", key)
-            default = field.get("default")
-            is_secret = field.get("secret", False)
-            choices = field.get("choices")
-            env_var = field.get("env_var")
-            url = field.get("url")
-
-            if choices and not is_secret:
-                # Use curses picker for choice fields
-                choice_items = [(c, "") for c in choices]
-                current = provider_config.get(key, default)
-                current_idx = 0
-                if current and current in choices:
-                    current_idx = choices.index(current)
-                sel = _curses_select(f"  {desc}", choice_items, default=current_idx)
-                provider_config[key] = choices[sel]
-            elif is_secret:
-                # Prompt for secret
-                existing = os.environ.get(env_var, "") if env_var else ""
-                if existing:
-                    masked = f"...{existing[-4:]}" if len(existing) > 4 else "set"
-                    val = _prompt(f"{desc} (current: {masked}, blank to keep)", secret=True)
-                else:
-                    hint = f"  Get yours at {url}" if url else ""
-                    if hint:
-                        print(hint)
-                    val = _prompt(desc, secret=True)
-                if val and env_var:
-                    env_writes[env_var] = val
-            else:
-                # Regular text prompt
-                current = provider_config.get(key)
-                effective_default = current or default
-                val = _prompt(desc, default=str(effective_default) if effective_default else None)
-                if val:
-                    provider_config[key] = val
-
-    # Write activation key to config.yaml
-    config["memory"]["provider"] = name
-    save_config(config)
-
-    # Write non-secret config to provider's native location
-    hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
-    if provider_config and hasattr(provider, "save_config"):
-        try:
-            provider.save_config(provider_config, hermes_home)
-        except Exception as e:
-            print(f"  ⚠ Failed to write provider config: {e}")
-
-    # Write secrets to .env
-    if env_writes:
-        _write_env_vars(env_path, env_writes)
-
-    print(f"\n  ✓ Memory provider: {name}")
-    print(f"  ✓ Activation saved to config.yaml")
-    if provider_config:
-        print(f"  ✓ Provider config saved")
-    if env_writes:
-        print(f"  ✓ API keys saved to .env")
-    print(f"\n  Start a new session to activate.\n")
-
-
-def _write_env_vars(env_path: Path, env_writes: dict) -> None:
-    """Append or update env vars in .env file."""
-    env_path.parent.mkdir(parents=True, exist_ok=True)
-
-    existing_lines = []
-    if env_path.exists():
-        existing_lines = env_path.read_text().splitlines()
-
-    updated_keys = set()
-    new_lines = []
-    for line in existing_lines:
-        key_match = line.split("=", 1)[0].strip() if "=" in line else ""
-        if key_match in env_writes:
-            new_lines.append(f"{key_match}={env_writes[key_match]}")
-            updated_keys.add(key_match)
-        else:
-            new_lines.append(line)
-
-    for key, val in env_writes.items():
-        if key not in updated_keys:
-            new_lines.append(f"{key}={val}")
-
-    env_path.write_text("\n".join(new_lines) + "\n")
-
-
-# ---------------------------------------------------------------------------
-# Status
-# ---------------------------------------------------------------------------
-
-def cmd_status(args) -> None:
-    """Show current memory provider config."""
-    from hermes_cli.config import load_config
-
-    config = load_config()
-    mem_config = config.get("memory", {})
-    provider_name = mem_config.get("provider", "")
-
-    print(f"\nMemory status\n" + "─" * 40)
-    print(f"  Built-in:  always active")
-    print(f"  Provider:  {provider_name or '(none — built-in only)'}")
-
-    if provider_name:
-        provider_config = mem_config.get(provider_name, {})
-        if provider_config:
-            print(f"\n  {provider_name} config:")
-            for key, val in provider_config.items():
-                print(f"    {key}: {val}")
-
-        providers = _get_available_providers()
-        found = any(name == provider_name for name, _, _ in providers)
-        if found:
-            print(f"\n  Plugin:    installed ✓")
-            for pname, _, p in providers:
-                if pname == provider_name:
-                    if p.is_available():
-                        print(f"  Status:    available ✓")
-                    else:
-                        print(f"  Status:    not available ✗")
-                        schema = p.get_config_schema() if hasattr(p, "get_config_schema") else []
-                        secrets = [f for f in schema if f.get("secret")]
-                        if secrets:
-                            print(f"  Missing:")
-                            for s in secrets:
-                                env_var = s.get("env_var", "")
-                                url = s.get("url", "")
-                                is_set = bool(os.environ.get(env_var))
-                                mark = "✓" if is_set else "✗"
-                                line = f"    {mark} {env_var}"
-                                if url and not is_set:
-                                    line += f"  → {url}"
-                                print(line)
-                    break
-        else:
-            print(f"\n  Plugin:    NOT installed ✗")
-            print(f"  Install the '{provider_name}' memory plugin to ~/.hermes/plugins/")
-
-    providers = _get_available_providers()
-    if providers:
-        print(f"\n  Installed plugins:")
-        for pname, desc, _ in providers:
-            active = " ← active" if pname == provider_name else ""
-            print(f"    • {pname}  ({desc}){active}")
-
-    print()
-
-
-# ---------------------------------------------------------------------------
-# Router
-# ---------------------------------------------------------------------------
-
-def memory_command(args) -> None:
-    """Route memory subcommands."""
-    sub = getattr(args, "memory_command", None)
-    if sub == "setup":
-        cmd_setup(args)
-    elif sub == "status":
-        cmd_status(args)
-    else:
-        cmd_status(args)
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -26,7 +26,6 @@ class ModelSwitchResult:
    provider_changed: bool = False
    api_key: str = ""
    base_url: str = ""
-    api_mode: str = ""
    persist: bool = False
    error_message: str = ""
    warning_message: str = ""
@@ -74,7 +73,6 @@ def switch_model(
        detect_provider_for_model,
        validate_requested_model,
        _PROVIDER_LABELS,
-        opencode_model_api_mode,
    )
    from hermes_cli.runtime_provider import resolve_runtime_provider

@@ -100,13 +98,11 @@ def switch_model(
    # Step 4: Resolve credentials for target provider
    api_key = current_api_key
    base_url = current_base_url
-    api_mode = ""
    if provider_changed:
        try:
            runtime = resolve_runtime_provider(requested=target_provider)
            api_key = runtime.get("api_key", "")
            base_url = runtime.get("base_url", "")
-            api_mode = runtime.get("api_mode", "")
        except Exception as e:
            provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
            if target_provider == "custom":
@@ -134,7 +130,6 @@ def switch_model(
            runtime = resolve_runtime_provider(requested=current_provider)
            api_key = runtime.get("api_key", "")
            base_url = runtime.get("base_url", "")
-            api_mode = runtime.get("api_mode", "")
        except Exception:
            pass

@@ -171,12 +166,6 @@ def switch_model(
        and ("localhost" in (base_url or "") or "127.0.0.1" in (base_url or ""))
    )

-    if target_provider in {"opencode-zen", "opencode-go"}:
-        # Recompute against the requested new model, not the currently-configured
-        # model used during runtime resolution. OpenCode mixes API surfaces by
-        # model family, so a same-provider model switch can change api_mode.
-        api_mode = opencode_model_api_mode(target_provider, new_model)
-
    return ModelSwitchResult(
        success=True,
        new_model=new_model,
@@ -184,7 +173,6 @@ def switch_model(
        provider_changed=provider_changed,
        api_key=api_key,
        base_url=base_url,
-        api_mode=api_mode,
        persist=bool(validation.get("persist")),
        warning_message=validation.get("message") or "",
        is_custom_target=is_custom_target,
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -125,12 +125,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
-    "moonshot": [
-        "kimi-k2.5",
-        "kimi-k2-thinking",
-        "kimi-k2-turbo-preview",
-        "kimi-k2-0905-preview",
-    ],
    "minimax": [
        "MiniMax-M2.7",
        "MiniMax-M2.7-highspeed",
@@ -954,53 +948,6 @@ def copilot_model_api_mode(
    return "chat_completions"


-def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str:
-    """Normalize OpenCode config IDs to the bare model slug used in API requests."""
-    provider = normalize_provider(provider_id)
-    current = str(model_id or "").strip()
-    if not current or provider not in {"opencode-zen", "opencode-go"}:
-        return current
-
-    prefix = f"{provider}/"
-    if current.lower().startswith(prefix):
-        return current[len(prefix):]
-    return current
-
-
-def opencode_model_api_mode(provider_id: Optional[str], model_id: Optional[str]) -> str:
-    """Determine the API mode for an OpenCode Zen / Go model.
-
-    OpenCode routes different models behind different API surfaces:
-
-    - GPT-5 / Codex models on Zen use ``/v1/responses``
-    - Claude models on Zen use ``/v1/messages``
-    - MiniMax models on Go use ``/v1/messages``
-    - GLM / Kimi on Go use ``/v1/chat/completions``
-    - Other Zen models (Gemini, GLM, Kimi, MiniMax, Qwen, etc.) use
-      ``/v1/chat/completions``
-
-    This follows the published OpenCode docs for Zen and Go endpoints.
-    """
-    provider = normalize_provider(provider_id)
-    normalized = normalize_opencode_model_id(provider_id, model_id).lower()
-    if not normalized:
-        return "chat_completions"
-
-    if provider == "opencode-go":
-        if normalized.startswith("minimax-"):
-            return "anthropic_messages"
-        return "chat_completions"
-
-    if provider == "opencode-zen":
-        if normalized.startswith("claude-"):
-            return "anthropic_messages"
-        if normalized.startswith("gpt-"):
-            return "codex_responses"
-        return "chat_completions"
-
-    return "chat_completions"
-
-
 def github_model_reasoning_efforts(
    model_id: Optional[str],
    *,
--- a/hermes_cli/nous_subscription.py
+++ b/hermes_cli/nous_subscription.py
@@ -1,517 +0,0 @@
-"""Helpers for Nous subscription managed-tool capabilities."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Dict, Iterable, Optional, Set
-
-from hermes_cli.auth import get_nous_auth_status
-from hermes_cli.config import get_env_value, load_config
-from tools.managed_tool_gateway import is_managed_tool_gateway_ready
-from tools.tool_backend_helpers import (
-    has_direct_modal_credentials,
-    managed_nous_tools_enabled,
-    normalize_browser_cloud_provider,
-    normalize_modal_mode,
-    resolve_modal_backend_state,
-    resolve_openai_audio_api_key,
-)
-
-
-_DEFAULT_PLATFORM_TOOLSETS = {
-    "cli": "hermes-cli",
-}
-
-
-@dataclass(frozen=True)
-class NousFeatureState:
-    key: str
-    label: str
-    included_by_default: bool
-    available: bool
-    active: bool
-    managed_by_nous: bool
-    direct_override: bool
-    toolset_enabled: bool
-    current_provider: str = ""
-    explicit_configured: bool = False
-
-
-@dataclass(frozen=True)
-class NousSubscriptionFeatures:
-    subscribed: bool
-    nous_auth_present: bool
-    provider_is_nous: bool
-    features: Dict[str, NousFeatureState]
-
-    @property
-    def web(self) -> NousFeatureState:
-        return self.features["web"]
-
-    @property
-    def image_gen(self) -> NousFeatureState:
-        return self.features["image_gen"]
-
-    @property
-    def tts(self) -> NousFeatureState:
-        return self.features["tts"]
-
-    @property
-    def browser(self) -> NousFeatureState:
-        return self.features["browser"]
-
-    @property
-    def modal(self) -> NousFeatureState:
-        return self.features["modal"]
-
-    def items(self) -> Iterable[NousFeatureState]:
-        ordered = ("web", "image_gen", "tts", "browser", "modal")
-        for key in ordered:
-            yield self.features[key]
-
-
-def _model_config_dict(config: Dict[str, object]) -> Dict[str, object]:
-    model_cfg = config.get("model")
-    if isinstance(model_cfg, dict):
-        return dict(model_cfg)
-    if isinstance(model_cfg, str) and model_cfg.strip():
-        return {"default": model_cfg.strip()}
-    return {}
-
-
-def _toolset_enabled(config: Dict[str, object], toolset_key: str) -> bool:
-    from toolsets import resolve_toolset
-
-    platform_toolsets = config.get("platform_toolsets")
-    if not isinstance(platform_toolsets, dict) or not platform_toolsets:
-        platform_toolsets = {"cli": [_DEFAULT_PLATFORM_TOOLSETS["cli"]]}
-
-    target_tools = set(resolve_toolset(toolset_key))
-    if not target_tools:
-        return False
-
-    for platform, raw_toolsets in platform_toolsets.items():
-        if isinstance(raw_toolsets, list):
-            toolset_names = list(raw_toolsets)
-        else:
-            default_toolset = _DEFAULT_PLATFORM_TOOLSETS.get(platform)
-            toolset_names = [default_toolset] if default_toolset else []
-        if not toolset_names:
-            default_toolset = _DEFAULT_PLATFORM_TOOLSETS.get(platform)
-            if default_toolset:
-                toolset_names = [default_toolset]
-
-        available_tools: Set[str] = set()
-        for toolset_name in toolset_names:
-            if not isinstance(toolset_name, str) or not toolset_name:
-                continue
-            try:
-                available_tools.update(resolve_toolset(toolset_name))
-            except Exception:
-                continue
-
-        if target_tools and target_tools.issubset(available_tools):
-            return True
-
-    return False
-
-
-def _has_agent_browser() -> bool:
-    import shutil
-
-    agent_browser_bin = shutil.which("agent-browser")
-    local_bin = (
-        Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser"
-    )
-    return bool(agent_browser_bin or local_bin.exists())
-
-
-def _browser_label(current_provider: str) -> str:
-    mapping = {
-        "browserbase": "Browserbase",
-        "browser-use": "Browser Use",
-        "camofox": "Camofox",
-        "local": "Local browser",
-    }
-    return mapping.get(current_provider or "local", current_provider or "Local browser")
-
-
-def _tts_label(current_provider: str) -> str:
-    mapping = {
-        "openai": "OpenAI TTS",
-        "elevenlabs": "ElevenLabs",
-        "edge": "Edge TTS",
-        "neutts": "NeuTTS",
-    }
-    return mapping.get(current_provider or "edge", current_provider or "Edge TTS")
-
-
-def _resolve_browser_feature_state(
-    *,
-    browser_tool_enabled: bool,
-    browser_provider: str,
-    browser_provider_explicit: bool,
-    browser_local_available: bool,
-    direct_camofox: bool,
-    direct_browserbase: bool,
-    direct_browser_use: bool,
-    managed_browser_available: bool,
-) -> tuple[str, bool, bool, bool]:
-    """Resolve browser availability using the same precedence as runtime."""
-    if direct_camofox:
-        return "camofox", True, bool(browser_tool_enabled), False
-
-    if browser_provider_explicit:
-        current_provider = browser_provider or "local"
-        if current_provider == "browserbase":
-            provider_available = managed_browser_available or direct_browserbase
-            available = bool(browser_local_available and provider_available)
-            managed = bool(
-                browser_tool_enabled
-                and browser_local_available
-                and managed_browser_available
-                and not direct_browserbase
-            )
-            active = bool(browser_tool_enabled and available)
-            return current_provider, available, active, managed
-        if current_provider == "browser-use":
-            available = bool(browser_local_available and direct_browser_use)
-            active = bool(browser_tool_enabled and available)
-            return current_provider, available, active, False
-        if current_provider == "camofox":
-            return current_provider, False, False, False
-
-        current_provider = "local"
-        available = bool(browser_local_available)
-        active = bool(browser_tool_enabled and available)
-        return current_provider, available, active, False
-
-    if managed_browser_available or direct_browserbase:
-        available = bool(browser_local_available)
-        managed = bool(
-            browser_tool_enabled
-            and browser_local_available
-            and managed_browser_available
-            and not direct_browserbase
-        )
-        active = bool(browser_tool_enabled and available)
-        return "browserbase", available, active, managed
-
-    available = bool(browser_local_available)
-    active = bool(browser_tool_enabled and available)
-    return "local", available, active, False
-
-
-def get_nous_subscription_features(
-    config: Optional[Dict[str, object]] = None,
-) -> NousSubscriptionFeatures:
-    if config is None:
-        config = load_config() or {}
-    config = dict(config)
-    model_cfg = _model_config_dict(config)
-    provider_is_nous = str(model_cfg.get("provider") or "").strip().lower() == "nous"
-
-    try:
-        nous_status = get_nous_auth_status()
-    except Exception:
-        nous_status = {}
-
-    managed_tools_flag = managed_nous_tools_enabled()
-    nous_auth_present = bool(nous_status.get("logged_in"))
-    subscribed = provider_is_nous or nous_auth_present
-
-    web_tool_enabled = _toolset_enabled(config, "web")
-    image_tool_enabled = _toolset_enabled(config, "image_gen")
-    tts_tool_enabled = _toolset_enabled(config, "tts")
-    browser_tool_enabled = _toolset_enabled(config, "browser")
-    modal_tool_enabled = _toolset_enabled(config, "terminal")
-
-    web_cfg = config.get("web") if isinstance(config.get("web"), dict) else {}
-    tts_cfg = config.get("tts") if isinstance(config.get("tts"), dict) else {}
-    browser_cfg = config.get("browser") if isinstance(config.get("browser"), dict) else {}
-    terminal_cfg = config.get("terminal") if isinstance(config.get("terminal"), dict) else {}
-
-    web_backend = str(web_cfg.get("backend") or "").strip().lower()
-    tts_provider = str(tts_cfg.get("provider") or "edge").strip().lower()
-    browser_provider_explicit = "cloud_provider" in browser_cfg
-    browser_provider = normalize_browser_cloud_provider(
-        browser_cfg.get("cloud_provider") if browser_provider_explicit else None
-    )
-    terminal_backend = (
-        str(terminal_cfg.get("backend") or "local").strip().lower()
-    )
-    modal_mode = normalize_modal_mode(
-        terminal_cfg.get("modal_mode")
-    )
-
-    direct_exa = bool(get_env_value("EXA_API_KEY"))
-    direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
-    direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
-    direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
-    direct_fal = bool(get_env_value("FAL_KEY"))
-    direct_openai_tts = bool(resolve_openai_audio_api_key())
-    direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
-    direct_camofox = bool(get_env_value("CAMOFOX_URL"))
-    direct_browserbase = bool(get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
-    direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY"))
-    direct_modal = has_direct_modal_credentials()
-
-    managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
-    managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
-    managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
-    managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browserbase")
-    managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
-    modal_state = resolve_modal_backend_state(
-        modal_mode,
-        has_direct=direct_modal,
-        managed_ready=managed_modal_available,
-    )
-
-    web_managed = web_backend == "firecrawl" and managed_web_available and not direct_firecrawl
-    web_active = bool(
-        web_tool_enabled
-        and (
-            web_managed
-            or (web_backend == "exa" and direct_exa)
-            or (web_backend == "firecrawl" and direct_firecrawl)
-            or (web_backend == "parallel" and direct_parallel)
-            or (web_backend == "tavily" and direct_tavily)
-        )
-    )
-    web_available = bool(
-        managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily
-    )
-
-    image_managed = image_tool_enabled and managed_image_available and not direct_fal
-    image_active = bool(image_tool_enabled and (image_managed or direct_fal))
-    image_available = bool(managed_image_available or direct_fal)
-
-    tts_current_provider = tts_provider or "edge"
-    tts_managed = (
-        tts_tool_enabled
-        and tts_current_provider == "openai"
-        and managed_tts_available
-        and not direct_openai_tts
-    )
-    tts_available = bool(
-        tts_current_provider in {"edge", "neutts"}
-        or (tts_current_provider == "openai" and (managed_tts_available or direct_openai_tts))
-        or (tts_current_provider == "elevenlabs" and direct_elevenlabs)
-    )
-    tts_active = bool(tts_tool_enabled and tts_available)
-
-    browser_local_available = _has_agent_browser()
-    (
-        browser_current_provider,
-        browser_available,
-        browser_active,
-        browser_managed,
-    ) = _resolve_browser_feature_state(
-        browser_tool_enabled=browser_tool_enabled,
-        browser_provider=browser_provider,
-        browser_provider_explicit=browser_provider_explicit,
-        browser_local_available=browser_local_available,
-        direct_camofox=direct_camofox,
-        direct_browserbase=direct_browserbase,
-        direct_browser_use=direct_browser_use,
-        managed_browser_available=managed_browser_available,
-    )
-
-    if terminal_backend != "modal":
-        modal_managed = False
-        modal_available = True
-        modal_active = bool(modal_tool_enabled)
-        modal_direct_override = False
-    elif modal_state["selected_backend"] == "managed":
-        modal_managed = bool(modal_tool_enabled)
-        modal_available = True
-        modal_active = bool(modal_tool_enabled)
-        modal_direct_override = False
-    elif modal_state["selected_backend"] == "direct":
-        modal_managed = False
-        modal_available = True
-        modal_active = bool(modal_tool_enabled)
-        modal_direct_override = bool(modal_tool_enabled)
-    elif modal_mode == "managed":
-        modal_managed = False
-        modal_available = bool(managed_modal_available)
-        modal_active = False
-        modal_direct_override = False
-    elif modal_mode == "direct":
-        modal_managed = False
-        modal_available = bool(direct_modal)
-        modal_active = False
-        modal_direct_override = False
-    else:
-        modal_managed = False
-        modal_available = bool(managed_modal_available or direct_modal)
-        modal_active = False
-        modal_direct_override = False
-
-    tts_explicit_configured = False
-    raw_tts_cfg = config.get("tts")
-    if isinstance(raw_tts_cfg, dict) and "provider" in raw_tts_cfg:
-        tts_explicit_configured = tts_provider not in {"", "edge"}
-
-    features = {
-        "web": NousFeatureState(
-            key="web",
-            label="Web tools",
-            included_by_default=True,
-            available=web_available,
-            active=web_active,
-            managed_by_nous=web_managed,
-            direct_override=web_active and not web_managed,
-            toolset_enabled=web_tool_enabled,
-            current_provider=web_backend or "",
-            explicit_configured=bool(web_backend),
-        ),
-        "image_gen": NousFeatureState(
-            key="image_gen",
-            label="Image generation",
-            included_by_default=True,
-            available=image_available,
-            active=image_active,
-            managed_by_nous=image_managed,
-            direct_override=image_active and not image_managed,
-            toolset_enabled=image_tool_enabled,
-            current_provider="FAL" if direct_fal else ("Nous Subscription" if image_managed else ""),
-            explicit_configured=direct_fal,
-        ),
-        "tts": NousFeatureState(
-            key="tts",
-            label="OpenAI TTS",
-            included_by_default=True,
-            available=tts_available,
-            active=tts_active,
-            managed_by_nous=tts_managed,
-            direct_override=tts_active and not tts_managed,
-            toolset_enabled=tts_tool_enabled,
-            current_provider=_tts_label(tts_current_provider),
-            explicit_configured=tts_explicit_configured,
-        ),
-        "browser": NousFeatureState(
-            key="browser",
-            label="Browser automation",
-            included_by_default=True,
-            available=browser_available,
-            active=browser_active,
-            managed_by_nous=browser_managed,
-            direct_override=browser_active and not browser_managed,
-            toolset_enabled=browser_tool_enabled,
-            current_provider=_browser_label(browser_current_provider),
-            explicit_configured=browser_provider_explicit,
-        ),
-        "modal": NousFeatureState(
-            key="modal",
-            label="Modal execution",
-            included_by_default=False,
-            available=modal_available,
-            active=modal_active,
-            managed_by_nous=modal_managed,
-            direct_override=terminal_backend == "modal" and modal_direct_override,
-            toolset_enabled=modal_tool_enabled,
-            current_provider="Modal" if terminal_backend == "modal" else terminal_backend or "local",
-            explicit_configured=terminal_backend == "modal",
-        ),
-    }
-
-    return NousSubscriptionFeatures(
-        subscribed=subscribed,
-        nous_auth_present=nous_auth_present,
-        provider_is_nous=provider_is_nous,
-        features=features,
-    )
-
-
-def get_nous_subscription_explainer_lines() -> list[str]:
-    if not managed_nous_tools_enabled():
-        return []
-
-    return [
-        "Nous subscription enables managed web tools, image generation, OpenAI TTS, and browser automation by default.",
-        "Those managed tools bill to your Nous subscription. Modal execution is optional and can bill to your subscription too.",
-        "Change these later with: hermes setup tools, hermes setup terminal, or hermes status.",
-    ]
-
-
-def apply_nous_provider_defaults(config: Dict[str, object]) -> set[str]:
-    """Apply provider-level Nous defaults shared by `hermes setup` and `hermes model`."""
-    if not managed_nous_tools_enabled():
-        return set()
-
-    features = get_nous_subscription_features(config)
-    if not features.provider_is_nous:
-        return set()
-
-    tts_cfg = config.get("tts")
-    if not isinstance(tts_cfg, dict):
-        tts_cfg = {}
-        config["tts"] = tts_cfg
-
-    current_tts = str(tts_cfg.get("provider") or "edge").strip().lower()
-    if current_tts not in {"", "edge"}:
-        return set()
-
-    tts_cfg["provider"] = "openai"
-    return {"tts"}
-
-
-def apply_nous_managed_defaults(
-    config: Dict[str, object],
-    *,
-    enabled_toolsets: Optional[Iterable[str]] = None,
-) -> set[str]:
-    if not managed_nous_tools_enabled():
-        return set()
-
-    features = get_nous_subscription_features(config)
-    if not features.provider_is_nous:
-        return set()
-
-    selected_toolsets = set(enabled_toolsets or ())
-    changed: set[str] = set()
-
-    web_cfg = config.get("web")
-    if not isinstance(web_cfg, dict):
-        web_cfg = {}
-        config["web"] = web_cfg
-
-    tts_cfg = config.get("tts")
-    if not isinstance(tts_cfg, dict):
-        tts_cfg = {}
-        config["tts"] = tts_cfg
-
-    browser_cfg = config.get("browser")
-    if not isinstance(browser_cfg, dict):
-        browser_cfg = {}
-        config["browser"] = browser_cfg
-
-    if "web" in selected_toolsets and not features.web.explicit_configured and not (
-        get_env_value("PARALLEL_API_KEY")
-        or get_env_value("TAVILY_API_KEY")
-        or get_env_value("FIRECRAWL_API_KEY")
-        or get_env_value("FIRECRAWL_API_URL")
-    ):
-        web_cfg["backend"] = "firecrawl"
-        changed.add("web")
-
-    if "tts" in selected_toolsets and not features.tts.explicit_configured and not (
-        resolve_openai_audio_api_key()
-        or get_env_value("ELEVENLABS_API_KEY")
-    ):
-        tts_cfg["provider"] = "openai"
-        changed.add("tts")
-
-    if "browser" in selected_toolsets and not features.browser.explicit_configured and not (
-        get_env_value("BROWSERBASE_API_KEY")
-        or get_env_value("BROWSER_USE_API_KEY")
-    ):
-        browser_cfg["cloud_provider"] = "browserbase"
-        changed.add("browser")
-
-    if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
-        changed.add("image_gen")
-
-    return changed
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@@ -38,8 +38,6 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Set

-from utils import env_var_enabled
-
 try:
    import yaml
 except ImportError:  # pragma: no cover – yaml is optional at import time
@@ -67,7 +65,7 @@ _NS_PARENT = "hermes_plugins"

 def _env_enabled(name: str) -> bool:
    """Return True when an env var is set to a truthy opt-in value."""
-    return env_var_enabled(name)
+    return os.getenv(name, "").strip().lower() in {"1", "true", "yes", "on"}


 def _get_disabled_plugins() -> set:
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -82,27 +82,9 @@ def _get_model_config() -> Dict[str, Any]:
    return {}


-def _provider_supports_explicit_api_mode(provider: Optional[str], configured_provider: Optional[str] = None) -> bool:
-    """Check whether a persisted api_mode should be honored for a given provider.
-
-    Prevents stale api_mode from a previous provider leaking into a
-    different one after a model/provider switch.  Only applies the
-    persisted mode when the config's provider matches the runtime
-    provider (or when no configured provider is recorded).
-    """
-    normalized_provider = (provider or "").strip().lower()
-    normalized_configured = (configured_provider or "").strip().lower()
-    if not normalized_configured:
-        return True
-    if normalized_provider == "custom":
-        return normalized_configured == "custom" or normalized_configured.startswith("custom:")
-    return normalized_configured == normalized_provider
-
-
 def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str:
-    configured_provider = str(model_cfg.get("provider") or "").strip().lower()
    configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-    if configured_mode and _provider_supports_explicit_api_mode("copilot", configured_provider):
+    if configured_mode:
        return configured_mode

    model_name = str(model_cfg.get("default") or "").strip()
@@ -158,13 +140,9 @@ def _resolve_runtime_from_pool_entry(
    elif provider == "copilot":
        api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
    else:
-        configured_provider = str(model_cfg.get("provider") or "").strip().lower()
        configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-        if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
+        if configured_mode:
            api_mode = configured_mode
-        elif provider in ("opencode-zen", "opencode-go"):
-            from hermes_cli.models import opencode_model_api_mode
-            api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
        elif base_url.rstrip("/").endswith("/anthropic"):
            api_mode = "anthropic_messages"

@@ -688,14 +666,10 @@ def resolve_runtime_provider(
        if provider == "copilot":
            api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
        else:
-            configured_provider = str(model_cfg.get("provider") or "").strip().lower()
-            # Only honor persisted api_mode when it belongs to the same provider family.
+            # Check explicit api_mode from model config first
            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-            if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
+            if configured_mode:
                api_mode = configured_mode
-            elif provider in ("opencode-zen", "opencode-go"):
-                from hermes_cli.models import opencode_model_api_mode
-                api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
            # Auto-detect Anthropic-compatible endpoints by URL convention
            # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
            elif base_url.rstrip("/").endswith("/anthropic"):
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -18,12 +18,6 @@ import sys
 from pathlib import Path
 from typing import Optional, Dict, Any

-from hermes_cli.nous_subscription import (
-    apply_nous_provider_defaults,
-    get_nous_subscription_explainer_lines,
-    get_nous_subscription_features,
-)
-from tools.tool_backend_helpers import managed_nous_tools_enabled
 from hermes_constants import get_optional_skills_dir

 logger = logging.getLogger(__name__)
@@ -114,8 +108,6 @@ _DEFAULT_PROVIDER_MODELS = {
    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
-    "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
-    "opencode-go": ["glm-5", "kimi-k2.5", "minimax-m2.5", "minimax-m2.7"],
    "huggingface": [
        "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -191,8 +183,6 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
        fetch_api_models,
        fetch_github_model_catalog,
        normalize_copilot_model_id,
-        normalize_opencode_model_id,
-        opencode_model_api_mode,
    )

    pconfig = PROVIDER_REGISTRY[provider_id]
@@ -246,11 +236,6 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
                f"    Use \"Custom model\" if the model you expect isn't listed."
            )

-    if provider_id in {"opencode-zen", "opencode-go"}:
-        provider_models = [normalize_opencode_model_id(provider_id, mid) for mid in provider_models]
-        current_model = normalize_opencode_model_id(provider_id, current_model)
-        provider_models = list(dict.fromkeys(mid for mid in provider_models if mid))
-
    model_choices = list(provider_models)
    model_choices.append("Custom model")
    model_choices.append(f"Keep current ({current_model})")
@@ -268,8 +253,6 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
                catalog=catalog,
                api_key=api_key,
            ) or selected_model
-        elif provider_id in {"opencode-zen", "opencode-go"}:
-            selected_model = normalize_opencode_model_id(provider_id, selected_model)
        _set_default_model(config, selected_model)
    elif model_idx == len(provider_models):
        custom = prompt_fn("Enter model name")
@@ -280,8 +263,6 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
                    catalog=catalog,
                    api_key=api_key,
                ) or custom
-            elif provider_id in {"opencode-zen", "opencode-go"}:
-                selected_model = normalize_opencode_model_id(provider_id, custom)
            else:
                selected_model = custom
            _set_default_model(config, selected_model)
@@ -313,10 +294,6 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
            catalog=catalog,
            api_key=api_key,
        )
-    elif provider_id in {"opencode-zen", "opencode-go"} and selected_model:
-        model_cfg = _model_config_dict(config)
-        model_cfg["api_mode"] = opencode_model_api_mode(provider_id, selected_model)
-        config["model"] = model_cfg


 def _sync_model_from_disk(config: Dict[str, Any]) -> None:
@@ -617,7 +594,6 @@ def _print_setup_summary(config: dict, hermes_home):
    print_header("Tool Availability Summary")

    tool_status = []
-    subscription_features = get_nous_subscription_features(config)

    # Vision — use the same runtime resolver as the actual vision tools
    try:
@@ -639,61 +615,42 @@ def _print_setup_summary(config: dict, hermes_home):
        tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))

    # Web tools (Exa, Parallel, Firecrawl, or Tavily)
-    if subscription_features.web.managed_by_nous:
-        tool_status.append(("Web Search & Extract (Nous subscription)", True, None))
-    elif subscription_features.web.available:
-        label = "Web Search & Extract"
-        if subscription_features.web.current_provider:
-            label = f"Web Search & Extract ({subscription_features.web.current_provider})"
-        tool_status.append((label, True, None))
+    if get_env_value("EXA_API_KEY") or get_env_value("PARALLEL_API_KEY") or get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL") or get_env_value("TAVILY_API_KEY"):
+        tool_status.append(("Web Search & Extract", True, None))
    else:
-        tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY/FIRECRAWL_API_URL, or TAVILY_API_KEY"))
+        tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY, or TAVILY_API_KEY"))

-    # Browser tools (local Chromium, Camofox, Browserbase, or Browser Use)
-    browser_provider = subscription_features.browser.current_provider
-    if subscription_features.browser.managed_by_nous:
-        tool_status.append(("Browser Automation (Nous Browserbase)", True, None))
-    elif subscription_features.browser.available:
-        label = "Browser Automation"
-        if browser_provider:
-            label = f"Browser Automation ({browser_provider})"
-        tool_status.append((label, True, None))
+    # Browser tools (local Chromium or Browserbase cloud)
+    import shutil
+
+    _ab_found = (
+        shutil.which("agent-browser")
+        or (
+            Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser"
+        ).exists()
+    )
+    if get_env_value("CAMOFOX_URL"):
+        tool_status.append(("Browser Automation (Camofox)", True, None))
+    elif get_env_value("BROWSERBASE_API_KEY"):
+        tool_status.append(("Browser Automation (Browserbase)", True, None))
+    elif _ab_found:
+        tool_status.append(("Browser Automation (local)", True, None))
    else:
-        missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browserbase"
-        if browser_provider == "Browserbase":
-            missing_browser_hint = (
-                "npm install -g agent-browser and set "
-                "BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID"
-            )
-        elif browser_provider == "Browser Use":
-            missing_browser_hint = (
-                "npm install -g agent-browser and set BROWSER_USE_API_KEY"
-            )
-        elif browser_provider == "Camofox":
-            missing_browser_hint = "CAMOFOX_URL"
-        elif browser_provider == "Local browser":
-            missing_browser_hint = "npm install -g agent-browser"
        tool_status.append(
-            ("Browser Automation", False, missing_browser_hint)
+            ("Browser Automation", False, "npm install -g agent-browser or set CAMOFOX_URL")
        )

    # FAL (image generation)
-    if subscription_features.image_gen.managed_by_nous:
-        tool_status.append(("Image Generation (Nous subscription)", True, None))
-    elif subscription_features.image_gen.available:
+    if get_env_value("FAL_KEY"):
        tool_status.append(("Image Generation", True, None))
    else:
        tool_status.append(("Image Generation", False, "FAL_KEY"))

    # TTS — show configured provider
    tts_provider = config.get("tts", {}).get("provider", "edge")
-    if subscription_features.tts.managed_by_nous:
-        tool_status.append(("Text-to-Speech (OpenAI via Nous subscription)", True, None))
-    elif tts_provider == "elevenlabs" and get_env_value("ELEVENLABS_API_KEY"):
+    if tts_provider == "elevenlabs" and get_env_value("ELEVENLABS_API_KEY"):
        tool_status.append(("Text-to-Speech (ElevenLabs)", True, None))
-    elif tts_provider == "openai" and (
-        get_env_value("VOICE_TOOLS_OPENAI_KEY") or get_env_value("OPENAI_API_KEY")
-    ):
+    elif tts_provider == "openai" and get_env_value("VOICE_TOOLS_OPENAI_KEY"):
        tool_status.append(("Text-to-Speech (OpenAI)", True, None))
    elif tts_provider == "neutts":
        try:
@@ -708,16 +665,6 @@ def _print_setup_summary(config: dict, hermes_home):
    else:
        tool_status.append(("Text-to-Speech (Edge TTS)", True, None))

-    if subscription_features.modal.managed_by_nous:
-        tool_status.append(("Modal Execution (Nous subscription)", True, None))
-    elif config.get("terminal", {}).get("backend") == "modal":
-        if subscription_features.modal.direct_override:
-            tool_status.append(("Modal Execution (direct Modal)", True, None))
-        else:
-            tool_status.append(("Modal Execution", False, "run 'hermes setup terminal'"))
-    elif managed_nous_tools_enabled() and subscription_features.nous_auth_present:
-        tool_status.append(("Modal Execution (optional via Nous subscription)", True, None))
-
    # Tinker + WandB (RL training)
    if get_env_value("TINKER_API_KEY") and get_env_value("WANDB_API_KEY"):
        tool_status.append(("RL Training (Tinker)", True, None))
@@ -927,7 +874,6 @@ def setup_model_provider(config: dict):
    if isinstance(_m, dict):
        selected_provider = _m.get("provider")

-    nous_subscription_selected = selected_provider == "nous"

    # ── Same-provider fallback & rotation setup ──
    if _supports_same_provider_pool_setup(selected_provider):
@@ -1093,20 +1039,10 @@ def setup_model_provider(config: dict):
            print_info("Skipped — add later with 'hermes setup' or configure AUXILIARY_VISION_* settings")


-    if selected_provider == "nous" and nous_subscription_selected:
-        changed_defaults = apply_nous_provider_defaults(config)
-        current_tts = str(config.get("tts", {}).get("provider") or "edge")
-        if "tts" in changed_defaults:
-            print_success("TTS provider set to: OpenAI TTS via your Nous subscription")
-        else:
-            print_info(f"Keeping your existing TTS provider: {current_tts}")
-
    save_config(config)

-    # Offer TTS provider selection at the end of model setup, except when
-    # Nous subscription defaults are already being applied.
-    if selected_provider != "nous":
-        _setup_tts_provider(config)
+    # Offer TTS provider selection at the end of model setup
+    _setup_tts_provider(config)


 # =============================================================================
@@ -1174,7 +1110,6 @@ def _setup_tts_provider(config: dict):
    """Interactive TTS provider selection with install flow for NeuTTS."""
    tts_config = config.get("tts", {})
    current_provider = tts_config.get("provider", "edge")
-    subscription_features = get_nous_subscription_features(config)

    provider_labels = {
        "edge": "Edge TTS",
@@ -1189,36 +1124,20 @@ def _setup_tts_provider(config: dict):
    print_info(f"Current: {current_label}")
    print()

-    choices = []
-    providers = []
-    if managed_nous_tools_enabled() and subscription_features.nous_auth_present:
-        choices.append("Nous Subscription (managed OpenAI TTS, billed to your subscription)")
-        providers.append("nous-openai")
-    choices.extend(
-        [
-            "Edge TTS (free, cloud-based, no setup needed)",
-            "ElevenLabs (premium quality, needs API key)",
-            "OpenAI TTS (good quality, needs API key)",
-            "NeuTTS (local on-device, free, ~300MB model download)",
-        ]
-    )
-    providers.extend(["edge", "elevenlabs", "openai", "neutts"])
-    choices.append(f"Keep current ({current_label})")
-    keep_current_idx = len(choices) - 1
-    idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
+    choices = [
+        "Edge TTS (free, cloud-based, no setup needed)",
+        "ElevenLabs (premium quality, needs API key)",
+        "OpenAI TTS (good quality, needs API key)",
+        "NeuTTS (local on-device, free, ~300MB model download)",
+        f"Keep current ({current_label})",
+    ]
+    idx = prompt_choice("Select TTS provider:", choices, len(choices) - 1)

-    if idx == keep_current_idx:
+    if idx == 4:  # Keep current
        return

+    providers = ["edge", "elevenlabs", "openai", "neutts"]
    selected = providers[idx]
-    selected_via_nous = selected == "nous-openai"
-    if selected == "nous-openai":
-        selected = "openai"
-        print_info("OpenAI TTS will use the managed Nous gateway and bill to your subscription.")
-        if get_env_value("VOICE_TOOLS_OPENAI_KEY") or get_env_value("OPENAI_API_KEY"):
-            print_warning(
-                "Direct OpenAI credentials are still configured and may take precedence until removed from ~/.hermes/.env."
-            )

    if selected == "neutts":
        # Check if already installed
@@ -1256,8 +1175,8 @@ def _setup_tts_provider(config: dict):
                print_warning("No API key provided. Falling back to Edge TTS.")
                selected = "edge"

-    elif selected == "openai" and not selected_via_nous:
-        existing = get_env_value("VOICE_TOOLS_OPENAI_KEY") or get_env_value("OPENAI_API_KEY")
+    elif selected == "openai":
+        existing = get_env_value("VOICE_TOOLS_OPENAI_KEY")
        if not existing:
            print()
            api_key = prompt("OpenAI API key for TTS", password=True)
@@ -1412,99 +1331,63 @@ def setup_terminal_backend(config: dict):
    elif selected_backend == "modal":
        print_success("Terminal backend: Modal")
        print_info("Serverless cloud sandboxes. Each session gets its own container.")
-        from tools.managed_tool_gateway import is_managed_tool_gateway_ready
-        from tools.tool_backend_helpers import normalize_modal_mode
+        print_info("Requires a Modal account: https://modal.com")

-        managed_modal_available = bool(
-            managed_nous_tools_enabled()
-            and
-            get_nous_subscription_features(config).nous_auth_present
-            and is_managed_tool_gateway_ready("modal")
-        )
-        modal_mode = normalize_modal_mode(config.get("terminal", {}).get("modal_mode"))
-        use_managed_modal = False
-        if managed_modal_available:
-            modal_choices = [
-                "Use my Nous subscription",
-                "Use my own Modal account",
-            ]
-            if modal_mode == "managed":
-                default_modal_idx = 0
-            elif modal_mode == "direct":
-                default_modal_idx = 1
-            else:
-                default_modal_idx = 1 if get_env_value("MODAL_TOKEN_ID") else 0
-            modal_mode_idx = prompt_choice(
-                "Select how Modal execution should be billed:",
-                modal_choices,
-                default_modal_idx,
-            )
-            use_managed_modal = modal_mode_idx == 0
+        # Check if modal SDK is installed
+        try:
+            __import__("modal")
+        except ImportError:
+            print_info("Installing modal SDK...")
+            import subprocess

-        if use_managed_modal:
-            config["terminal"]["modal_mode"] = "managed"
-            print_info("Modal execution will use the managed Nous gateway and bill to your subscription.")
-            if get_env_value("MODAL_TOKEN_ID") or get_env_value("MODAL_TOKEN_SECRET"):
-                print_info(
-                    "Direct Modal credentials are still configured, but this backend is pinned to managed mode."
+            uv_bin = shutil.which("uv")
+            if uv_bin:
+                result = subprocess.run(
+                    [
+                        uv_bin,
+                        "pip",
+                        "install",
+                        "--python",
+                        sys.executable,
+                        "modal",
+                    ],
+                    capture_output=True,
+                    text=True,
                )
-        else:
-            config["terminal"]["modal_mode"] = "direct"
-            print_info("Requires a Modal account: https://modal.com")
-
-            # Check if modal SDK is installed
-            try:
-                __import__("modal")
-            except ImportError:
-                print_info("Installing modal SDK...")
-                import subprocess
-
-                uv_bin = shutil.which("uv")
-                if uv_bin:
-                    result = subprocess.run(
-                        [
-                            uv_bin,
-                            "pip",
-                            "install",
-                            "--python",
-                            sys.executable,
-                            "modal",
-                        ],
-                        capture_output=True,
-                        text=True,
-                    )
-                else:
-                    result = subprocess.run(
-                        [sys.executable, "-m", "pip", "install", "modal"],
-                        capture_output=True,
-                        text=True,
-                    )
-                if result.returncode == 0:
-                    print_success("modal SDK installed")
-                else:
-                    print_warning("Install failed — run manually: pip install modal")
-
-            # Modal token
-            print()
-            print_info("Modal authentication:")
-            print_info("  Get your token at: https://modal.com/settings")
-            existing_token = get_env_value("MODAL_TOKEN_ID")
-            if existing_token:
-                print_info("  Modal token: already configured")
-                if prompt_yes_no("  Update Modal credentials?", False):
-                    token_id = prompt("    Modal Token ID", password=True)
-                    token_secret = prompt("    Modal Token Secret", password=True)
-                    if token_id:
-                        save_env_value("MODAL_TOKEN_ID", token_id)
-                    if token_secret:
-                        save_env_value("MODAL_TOKEN_SECRET", token_secret)
            else:
+                result = subprocess.run(
+                    [sys.executable, "-m", "pip", "install", "modal"],
+                    capture_output=True,
+                    text=True,
+                )
+            if result.returncode == 0:
+                print_success("modal SDK installed")
+            else:
+                print_warning(
+                    "Install failed — run manually: pip install modal"
+                )
+
+        # Modal token
+        print()
+        print_info("Modal authentication:")
+        print_info("  Get your token at: https://modal.com/settings")
+        existing_token = get_env_value("MODAL_TOKEN_ID")
+        if existing_token:
+            print_info("  Modal token: already configured")
+            if prompt_yes_no("  Update Modal credentials?", False):
                token_id = prompt("    Modal Token ID", password=True)
                token_secret = prompt("    Modal Token Secret", password=True)
                if token_id:
                    save_env_value("MODAL_TOKEN_ID", token_id)
                if token_secret:
                    save_env_value("MODAL_TOKEN_SECRET", token_secret)
+        else:
+            token_id = prompt("    Modal Token ID", password=True)
+            token_secret = prompt("    Modal Token Secret", password=True)
+            if token_id:
+                save_env_value("MODAL_TOKEN_ID", token_id)
+            if token_secret:
+                save_env_value("MODAL_TOKEN_SECRET", token_secret)

        _prompt_container_resources(config)

@@ -1618,8 +1501,6 @@ def setup_terminal_backend(config: dict):
    # Sync terminal backend to .env so terminal_tool picks it up directly.
    # config.yaml is the source of truth, but terminal_tool reads TERMINAL_ENV.
    save_env_value("TERMINAL_ENV", selected_backend)
-    if selected_backend == "modal":
-        save_env_value("TERMINAL_MODAL_MODE", config["terminal"].get("modal_mode", "auto"))
    save_config(config)
    print()
    print_success(f"Terminal backend set to: {selected_backend}")
@@ -1825,23 +1706,14 @@ def setup_gateway(config: dict):
            print_info("   1. Message @userinfobot on Telegram")
            print_info("   2. It will reply with your numeric ID (e.g., 123456789)")
            print()
-            existing_allowlist = get_env_value("TELEGRAM_ALLOWED_USERS")
-            if existing_allowlist:
-                print_info(f"   Current allowlist: {existing_allowlist}")
            allowed_users = prompt(
-                "Allowed user IDs (comma-separated, leave empty to "
-                + ("keep current" if existing_allowlist else "allow open access")
-                + ")"
+                "Allowed user IDs (comma-separated, leave empty for open access)"
            )
            if allowed_users:
                save_env_value("TELEGRAM_ALLOWED_USERS", allowed_users.replace(" ", ""))
                print_success(
                    "Telegram allowlist configured - only listed users can use the bot"
                )
-            elif existing_allowlist:
-                print_success(
-                    f"Keeping existing Telegram allowlist: {existing_allowlist}"
-                )
            else:
                print_info(
                    "⚠️  No allowlist set - anyone who finds your bot can use it!"
@@ -1913,13 +1785,8 @@ def setup_gateway(config: dict):
                "   You can also use Discord usernames (resolved on gateway start)."
            )
            print()
-            existing_allowlist = get_env_value("DISCORD_ALLOWED_USERS")
-            if existing_allowlist:
-                print_info(f"   Current allowlist: {existing_allowlist}")
            allowed_users = prompt(
-                "Allowed user IDs or usernames (comma-separated, leave empty to "
-                + ("keep current" if existing_allowlist else "allow open access")
-                + ")"
+                "Allowed user IDs or usernames (comma-separated, leave empty for open access)"
            )
            if allowed_users:
                # Clean up common prefixes (user:123, <@123>, <@!123>)
@@ -1934,10 +1801,6 @@ def setup_gateway(config: dict):
                        cleaned_ids.append(uid)
                save_env_value("DISCORD_ALLOWED_USERS", ",".join(cleaned_ids))
                print_success("Discord allowlist configured")
-            elif existing_allowlist:
-                print_success(
-                    f"Keeping existing Discord allowlist: {existing_allowlist}"
-                )
            else:
                print_info(
                    "⚠️  No allowlist set - anyone in servers with your bot can use it!"
@@ -2034,21 +1897,12 @@ def setup_gateway(config: dict):
                "   To find a Member ID: click a user's name → View full profile → ⋮ → Copy member ID"
            )
            print()
-            existing_allowlist = get_env_value("SLACK_ALLOWED_USERS")
-            if existing_allowlist:
-                print_info(f"   Current allowlist: {existing_allowlist}")
            allowed_users = prompt(
-                "Allowed user IDs (comma-separated, leave empty to "
-                + ("keep current" if existing_allowlist else "deny everyone except paired users")
-                + ")"
+                "Allowed user IDs (comma-separated, leave empty to deny everyone except paired users)"
            )
            if allowed_users:
                save_env_value("SLACK_ALLOWED_USERS", allowed_users.replace(" ", ""))
                print_success("Slack allowlist configured")
-            elif existing_allowlist:
-                print_success(
-                    f"Keeping existing Slack allowlist: {existing_allowlist}"
-                )
            else:
                print_warning(
                    "⚠️  No Slack allowlist set - unpaired users will be denied by default."
@@ -2132,21 +1986,12 @@ def setup_gateway(config: dict):
            print_info("🔒 Security: Restrict who can use your bot")
            print_info("   Matrix user IDs look like @username:server")
            print()
-            existing_allowlist = get_env_value("MATRIX_ALLOWED_USERS")
-            if existing_allowlist:
-                print_info(f"   Current allowlist: {existing_allowlist}")
            allowed_users = prompt(
-                "Allowed user IDs (comma-separated, leave empty to "
-                + ("keep current" if existing_allowlist else "allow open access")
-                + ")"
+                "Allowed user IDs (comma-separated, leave empty for open access)"
            )
            if allowed_users:
                save_env_value("MATRIX_ALLOWED_USERS", allowed_users.replace(" ", ""))
                print_success("Matrix allowlist configured")
-            elif existing_allowlist:
-                print_success(
-                    f"Keeping existing Matrix allowlist: {existing_allowlist}"
-                )
            else:
                print_info(
                    "⚠️  No allowlist set - anyone who can message the bot can use it!"
@@ -2187,21 +2032,12 @@ def setup_gateway(config: dict):
            print_info("   To find your user ID: click your avatar → Profile")
            print_info("   or use the API: GET /api/v4/users/me")
            print()
-            existing_allowlist = get_env_value("MATTERMOST_ALLOWED_USERS")
-            if existing_allowlist:
-                print_info(f"   Current allowlist: {existing_allowlist}")
            allowed_users = prompt(
-                "Allowed user IDs (comma-separated, leave empty to "
-                + ("keep current" if existing_allowlist else "allow open access")
-                + ")"
+                "Allowed user IDs (comma-separated, leave empty for open access)"
            )
            if allowed_users:
                save_env_value("MATTERMOST_ALLOWED_USERS", allowed_users.replace(" ", ""))
                print_success("Mattermost allowlist configured")
-            elif existing_allowlist:
-                print_success(
-                    f"Keeping existing Mattermost allowlist: {existing_allowlist}"
-                )
            else:
                print_info(
                    "⚠️  No allowlist set - anyone who can message the bot can use it!"
@@ -2636,17 +2472,6 @@ SETUP_SECTIONS = [
    ("agent", "Agent Settings", setup_agent_settings),
 ]

-# The returning-user menu intentionally omits standalone TTS because model setup
-# already includes TTS selection and tools setup covers the rest of the provider
-# configuration. Keep this list in the same order as the visible menu entries.
-RETURNING_USER_MENU_SECTION_KEYS = [
-    "model",
-    "terminal",
-    "gateway",
-    "tools",
-    "agent",
-]
-

 def run_setup_wizard(args):
    """Run the interactive setup wizard.
@@ -2797,7 +2622,8 @@ def run_setup_wizard(args):
            # Individual section — map by key, not by position.
            # SETUP_SECTIONS includes TTS but the returning-user menu skips it,
            # so positional indexing (choice - 3) would dispatch the wrong section.
-            section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 3]
+            _RETURNING_USER_SECTION_KEYS = ["model", "terminal", "gateway", "tools", "agent"]
+            section_key = _RETURNING_USER_SECTION_KEYS[choice - 3]
            section = next((s for s in SETUP_SECTIONS if s[0] == section_key), None)
            if section:
                _, label, func = section
--- a/hermes_cli/skills_config.py
+++ b/hermes_cli/skills_config.py
@@ -30,7 +30,6 @@ PLATFORMS = {
    "dingtalk": "💬 DingTalk",
    "feishu": "🪽 Feishu",
    "wecom": "💬 WeCom",
-    "webhook": "🔗 Webhook",
 }

 # ─── Config Helpers ───────────────────────────────────────────────────────────
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -15,10 +15,8 @@ from hermes_cli.auth import AuthError, resolve_provider
 from hermes_cli.colors import Colors, color
 from hermes_cli.config import get_env_path, get_env_value, get_hermes_home, load_config
 from hermes_cli.models import provider_label
-from hermes_cli.nous_subscription import get_nous_subscription_features
 from hermes_cli.runtime_provider import resolve_requested_provider
 from hermes_constants import OPENROUTER_MODELS_URL
-from tools.tool_backend_helpers import managed_nous_tools_enabled

 def check_mark(ok: bool) -> str:
    if ok:
@@ -188,31 +186,6 @@ def show_status(args):
    if codex_status.get("error") and not codex_logged_in:
        print(f"    Error:      {codex_status.get('error')}")

-    # =========================================================================
-    # Nous Subscription Features
-    # =========================================================================
-    if managed_nous_tools_enabled():
-        features = get_nous_subscription_features(config)
-        print()
-        print(color("◆ Nous Subscription Features", Colors.CYAN, Colors.BOLD))
-        if not features.nous_auth_present:
-            print("  Nous Portal   ✗ not logged in")
-        else:
-            print("  Nous Portal   ✓ managed tools available")
-        for feature in features.items():
-            if feature.managed_by_nous:
-                state = "active via Nous subscription"
-            elif feature.active:
-                current = feature.current_provider or "configured provider"
-                state = f"active via {current}"
-            elif feature.included_by_default and features.nous_auth_present:
-                state = "included by subscription, not currently selected"
-            elif feature.key == "modal" and features.nous_auth_present:
-                state = "available via subscription (optional)"
-            else:
-                state = "not configured"
-            print(f"  {feature.label:<15} {check_mark(feature.available or feature.active or feature.managed_by_nous)} {state}")
-
    # =========================================================================
    # API-Key Providers
    # =========================================================================
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -20,11 +20,6 @@ from hermes_cli.config import (
    load_config, save_config, get_env_value, save_env_value,
 )
 from hermes_cli.colors import Colors, color
-from hermes_cli.nous_subscription import (
-    apply_nous_managed_defaults,
-    get_nous_subscription_features,
-)
-from tools.tool_backend_helpers import managed_nous_tools_enabled

 logger = logging.getLogger(__name__)

@@ -150,7 +145,6 @@ PLATFORMS = {
    "wecom": {"label": "💬 WeCom", "default_toolset": "hermes-wecom"},
    "api_server": {"label": "🌐 API Server", "default_toolset": "hermes-api-server"},
    "mattermost": {"label": "💬 Mattermost", "default_toolset": "hermes-mattermost"},
-    "webhook": {"label": "🔗 Webhook", "default_toolset": "hermes-webhook"},
 }


@@ -164,15 +158,6 @@ TOOL_CATEGORIES = {
        "name": "Text-to-Speech",
        "icon": "🔊",
        "providers": [
-            {
-                "name": "Nous Subscription",
-                "tag": "Managed OpenAI TTS billed to your subscription",
-                "env_vars": [],
-                "tts_provider": "openai",
-                "requires_nous_auth": True,
-                "managed_nous_feature": "tts",
-                "override_env_vars": ["VOICE_TOOLS_OPENAI_KEY", "OPENAI_API_KEY"],
-            },
            {
                "name": "Microsoft Edge TTS",
                "tag": "Free - no API key needed",
@@ -203,15 +188,6 @@ TOOL_CATEGORIES = {
        "setup_note": "A free DuckDuckGo search skill is also included — skip this if you don't need a premium provider.",
        "icon": "🔍",
        "providers": [
-            {
-                "name": "Nous Subscription",
-                "tag": "Managed Firecrawl billed to your subscription",
-                "web_backend": "firecrawl",
-                "env_vars": [],
-                "requires_nous_auth": True,
-                "managed_nous_feature": "web",
-                "override_env_vars": ["FIRECRAWL_API_KEY", "FIRECRAWL_API_URL"],
-            },
            {
                "name": "Firecrawl Cloud",
                "tag": "Hosted service - search, extract, and crawl",
@@ -258,14 +234,6 @@ TOOL_CATEGORIES = {
        "name": "Image Generation",
        "icon": "🎨",
        "providers": [
-            {
-                "name": "Nous Subscription",
-                "tag": "Managed FAL image generation billed to your subscription",
-                "env_vars": [],
-                "requires_nous_auth": True,
-                "managed_nous_feature": "image_gen",
-                "override_env_vars": ["FAL_KEY"],
-            },
            {
                "name": "FAL.ai",
                "tag": "FLUX 2 Pro with auto-upscaling",
@@ -279,21 +247,11 @@ TOOL_CATEGORIES = {
        "name": "Browser Automation",
        "icon": "🌐",
        "providers": [
-            {
-                "name": "Nous Subscription (Browserbase cloud)",
-                "tag": "Managed Browserbase billed to your subscription",
-                "env_vars": [],
-                "browser_provider": "browserbase",
-                "requires_nous_auth": True,
-                "managed_nous_feature": "browser",
-                "override_env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"],
-                "post_setup": "browserbase",
-            },
            {
                "name": "Local Browser",
                "tag": "Free headless Chromium (no API key needed)",
                "env_vars": [],
-                "browser_provider": "local",
+                "browser_provider": None,
                "post_setup": "browserbase",  # Same npm install for agent-browser
            },
            {
@@ -623,11 +581,8 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    save_config(config)


-def _toolset_has_keys(ts_key: str, config: dict = None) -> bool:
+def _toolset_has_keys(ts_key: str) -> bool:
    """Check if a toolset's required API keys are configured."""
-    if config is None:
-        config = load_config()
-
    if ts_key == "vision":
        try:
            from agent.auxiliary_client import resolve_vision_provider_client
@@ -637,16 +592,10 @@ def _toolset_has_keys(ts_key: str, config: dict = None) -> bool:
        except Exception:
            return False

-    if ts_key in {"web", "image_gen", "tts", "browser"}:
-        features = get_nous_subscription_features(config)
-        feature = features.features.get(ts_key)
-        if feature and (feature.available or feature.managed_by_nous):
-            return True
-
    # Check TOOL_CATEGORIES first (provider-aware)
    cat = TOOL_CATEGORIES.get(ts_key)
    if cat:
-        for provider in _visible_providers(cat, config):
+        for provider in cat.get("providers", []):
            env_vars = provider.get("env_vars", [])
            if not env_vars:
                return True  # No-key provider (e.g. Local Browser, Edge TTS)
@@ -856,45 +805,11 @@ def _configure_toolset(ts_key: str, config: dict):
        _configure_simple_requirements(ts_key)


-def _visible_providers(cat: dict, config: dict) -> list[dict]:
-    """Return provider entries visible for the current auth/config state."""
-    features = get_nous_subscription_features(config)
-    visible = []
-    for provider in cat.get("providers", []):
-        if provider.get("managed_nous_feature") and not managed_nous_tools_enabled():
-            continue
-        if provider.get("requires_nous_auth") and not features.nous_auth_present:
-            continue
-        visible.append(provider)
-    return visible
-
-
-def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
-    """Return True when enabling this toolset should open provider setup."""
-    cat = TOOL_CATEGORIES.get(ts_key)
-    if not cat:
-        return not _toolset_has_keys(ts_key, config)
-
-    if ts_key == "tts":
-        tts_cfg = config.get("tts", {})
-        return not isinstance(tts_cfg, dict) or "provider" not in tts_cfg
-    if ts_key == "web":
-        web_cfg = config.get("web", {})
-        return not isinstance(web_cfg, dict) or "backend" not in web_cfg
-    if ts_key == "browser":
-        browser_cfg = config.get("browser", {})
-        return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
-    if ts_key == "image_gen":
-        return not get_env_value("FAL_KEY")
-
-    return not _toolset_has_keys(ts_key, config)
-
-
 def _configure_tool_category(ts_key: str, cat: dict, config: dict):
    """Configure a tool category with provider selection."""
    icon = cat.get("icon", "")
    name = cat["name"]
-    providers = _visible_providers(cat, config)
+    providers = cat["providers"]

    # Check Python version requirement
    if cat.get("requires_python"):
@@ -959,27 +874,6 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):

 def _is_provider_active(provider: dict, config: dict) -> bool:
    """Check if a provider entry matches the currently active config."""
-    managed_feature = provider.get("managed_nous_feature")
-    if managed_feature:
-        features = get_nous_subscription_features(config)
-        feature = features.features.get(managed_feature)
-        if feature is None:
-            return False
-        if managed_feature == "image_gen":
-            return feature.managed_by_nous
-        if provider.get("tts_provider"):
-            return (
-                feature.managed_by_nous
-                and config.get("tts", {}).get("provider") == provider["tts_provider"]
-            )
-        if "browser_provider" in provider:
-            current = config.get("browser", {}).get("cloud_provider")
-            return feature.managed_by_nous and provider["browser_provider"] == current
-        if provider.get("web_backend"):
-            current = config.get("web", {}).get("backend")
-            return feature.managed_by_nous and current == provider["web_backend"]
-        return feature.managed_by_nous
-
    if provider.get("tts_provider"):
        return config.get("tts", {}).get("provider") == provider["tts_provider"]
    if "browser_provider" in provider:
@@ -1006,13 +900,6 @@ def _detect_active_provider_index(providers: list, config: dict) -> int:
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
-    managed_feature = provider.get("managed_nous_feature")
-
-    if provider.get("requires_nous_auth"):
-        features = get_nous_subscription_features(config)
-        if not features.nous_auth_present:
-            _print_warning("  Nous Subscription is only available after logging into Nous Portal.")
-            return

    # Set TTS provider in config if applicable
    if provider.get("tts_provider"):
@@ -1021,12 +908,11 @@ def _configure_provider(provider: dict, config: dict):
    # Set browser cloud provider in config if applicable
    if "browser_provider" in provider:
        bp = provider["browser_provider"]
-        if bp == "local":
-            config.setdefault("browser", {})["cloud_provider"] = "local"
-            _print_success("  Browser set to local mode")
-        elif bp:
+        if bp:
            config.setdefault("browser", {})["cloud_provider"] = bp
            _print_success(f"  Browser cloud provider set to: {bp}")
+        else:
+            config.get("browser", {}).pop("cloud_provider", None)

    # Set web search backend in config if applicable
    if provider.get("web_backend"):
@@ -1034,16 +920,7 @@ def _configure_provider(provider: dict, config: dict):
        _print_success(f"  Web backend set to: {provider['web_backend']}")

    if not env_vars:
-        if provider.get("post_setup"):
-            _run_post_setup(provider["post_setup"])
        _print_success(f"  {provider['name']} - no configuration needed!")
-        if managed_feature:
-            _print_info("  Requests for this tool will be billed to your Nous subscription.")
-            override_envs = provider.get("override_env_vars", [])
-            if any(get_env_value(env_var) for env_var in override_envs):
-                _print_warning(
-                    "  Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env."
-                )
        return

    # Prompt for each required env var
@@ -1151,7 +1028,7 @@ def _reconfigure_tool(config: dict):
        cat = TOOL_CATEGORIES.get(ts_key)
        reqs = TOOLSET_ENV_REQUIREMENTS.get(ts_key)
        if cat or reqs:
-            if _toolset_has_keys(ts_key, config):
+            if _toolset_has_keys(ts_key):
                configurable.append((ts_key, ts_label))

    if not configurable:
@@ -1181,7 +1058,7 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
    """Reconfigure a tool category - provider selection + API key update."""
    icon = cat.get("icon", "")
    name = cat["name"]
-    providers = _visible_providers(cat, config)
+    providers = cat["providers"]

    if len(providers) == 1:
        provider = providers[0]
@@ -1216,13 +1093,6 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
 def _reconfigure_provider(provider: dict, config: dict):
    """Reconfigure a provider - update API keys."""
    env_vars = provider.get("env_vars", [])
-    managed_feature = provider.get("managed_nous_feature")
-
-    if provider.get("requires_nous_auth"):
-        features = get_nous_subscription_features(config)
-        if not features.nous_auth_present:
-            _print_warning("  Nous Subscription is only available after logging into Nous Portal.")
-            return

    if provider.get("tts_provider"):
        config.setdefault("tts", {})["provider"] = provider["tts_provider"]
@@ -1230,12 +1100,12 @@ def _reconfigure_provider(provider: dict, config: dict):

    if "browser_provider" in provider:
        bp = provider["browser_provider"]
-        if bp == "local":
-            config.setdefault("browser", {})["cloud_provider"] = "local"
-            _print_success("  Browser set to local mode")
-        elif bp:
+        if bp:
            config.setdefault("browser", {})["cloud_provider"] = bp
            _print_success(f"  Browser cloud provider set to: {bp}")
+        else:
+            config.get("browser", {}).pop("cloud_provider", None)
+            _print_success("  Browser set to local mode")

    # Set web search backend in config if applicable
    if provider.get("web_backend"):
@@ -1243,16 +1113,7 @@ def _reconfigure_provider(provider: dict, config: dict):
        _print_success(f"  Web backend set to: {provider['web_backend']}")

    if not env_vars:
-        if provider.get("post_setup"):
-            _run_post_setup(provider["post_setup"])
        _print_success(f"  {provider['name']} - no configuration needed!")
-        if managed_feature:
-            _print_info("  Requests for this tool will be billed to your Nous subscription.")
-            override_envs = provider.get("override_env_vars", [])
-            if any(get_env_value(env_var) for env_var in override_envs):
-                _print_warning(
-                    "  Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env."
-                )
        return

    for var in env_vars:
@@ -1361,23 +1222,13 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
                    label = next((l for k, l, _ in _get_effective_configurable_toolsets() if k == ts), ts)
                    print(color(f"  - {label}", Colors.RED))

-            auto_configured = apply_nous_managed_defaults(
-                config,
-                enabled_toolsets=new_enabled,
-            )
-            if managed_nous_tools_enabled():
-                for ts_key in sorted(auto_configured):
-                    label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key)
-                    print(color(f"  ✓ {label}: using your Nous subscription defaults", Colors.GREEN))
-
            # Walk through ALL selected tools that have provider options or
            # need API keys.  This ensures browser (Local vs Browserbase),
            # TTS (Edge vs OpenAI vs ElevenLabs), etc. are shown even when
            # a free provider exists.
            to_configure = [
                ts_key for ts_key in sorted(new_enabled)
-                if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key))
-                and ts_key not in auto_configured
+                if TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)
            ]

            if to_configure:
@@ -1470,7 +1321,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
                    # Configure API keys for newly enabled tools
                    for ts_key in sorted(added):
                        if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)):
-                            if _toolset_needs_configuration_prompt(ts_key, config):
+                            if not _toolset_has_keys(ts_key):
                                _configure_toolset(ts_key, config)
                    _save_platform_tools(config, pk, new_enabled)
                save_config(config)
@@ -1510,7 +1361,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
            # Configure newly enabled toolsets that need API keys
            for ts_key in sorted(added):
                if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)):
-                    if _toolset_needs_configuration_prompt(ts_key, config):
+                    if not _toolset_has_keys(ts_key):
                        _configure_toolset(ts_key, config)

            _save_platform_tools(config, pkey, new_enabled)
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -1009,9 +1009,8 @@ class SessionDB:
        Strategy:
        - Preserve properly paired quoted phrases (``"exact phrase"``)
        - Strip unmatched FTS5-special characters that would cause errors
-        - Wrap unquoted hyphenated and dotted terms in quotes so FTS5
-          matches them as exact phrases instead of splitting on the
-          hyphen/dot (e.g. ``chat-send``, ``P2.2``, ``my-app.config.ts``)
+        - Wrap unquoted hyphenated terms in quotes so FTS5 matches them
+          as exact phrases instead of splitting on the hyphen
        """
        # Step 1: Extract balanced double-quoted phrases and protect them
        # from further processing via numbered placeholders.
@@ -1036,13 +1035,11 @@ class SessionDB:
        sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip())
        sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip())

-        # Step 5: Wrap unquoted dotted and/or hyphenated terms in double
-        # quotes.  FTS5's tokenizer splits on dots and hyphens, turning
-        # ``chat-send`` into ``chat AND send`` and ``P2.2`` into ``p2 AND 2``.
-        # Quoting preserves phrase semantics.  A single pass avoids the
-        # double-quoting bug that would occur if dotted and hyphenated
-        # patterns were applied sequentially (e.g. ``my-app.config``).
-        sanitized = re.sub(r"\b(\w+(?:[.-]\w+)+)\b", r'"\1"', sanitized)
+        # Step 5: Wrap unquoted hyphenated terms (e.g. ``chat-send``) in
+        # double quotes.  FTS5's tokenizer splits on hyphens, turning
+        # ``chat-send`` into ``chat AND send``.  Quoting preserves the
+        # intended phrase match.
+        sanitized = re.sub(r"\b(\w+(?:-\w+)+)\b", r'"\1"', sanitized)

        # Step 6: Restore preserved quoted phrases
        for i, quoted in enumerate(_quoted_parts):
--- a/honcho_integration/init.py
+++ b/honcho_integration/init.py
@@ -0,0 +1,9 @@
+"""Honcho integration for AI-native memory.
+
+This package is only active when honcho.enabled=true in config and
+HONCHO_API_KEY is set. All honcho-ai imports are deferred to avoid
+ImportError when the package is not installed.
+
+Named ``honcho_integration`` (not ``honcho``) to avoid shadowing the
+``honcho`` package installed by the ``honcho-ai`` SDK.
+"""
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@@ -11,228 +11,9 @@ import sys
 from pathlib import Path

 from hermes_constants import get_hermes_home
-from plugins.memory.honcho.client import resolve_active_host, resolve_config_path, GLOBAL_CONFIG_PATH, HOST
+from honcho_integration.client import resolve_config_path, GLOBAL_CONFIG_PATH

-
-def clone_honcho_for_profile(profile_name: str) -> bool:
-    """Auto-clone Honcho config for a new profile from the default host block.
-
-    Called during profile creation. If Honcho is configured on the default
-    host, creates a new host block for the profile with inherited settings
-    and auto-derived workspace/aiPeer.
-
-    Returns True if a host block was created, False if Honcho isn't configured.
-    """
-    cfg = _read_config()
-    if not cfg:
-        return False
-
-    hosts = cfg.get("hosts", {})
-    default_block = hosts.get(HOST, {})
-
-    # No default host block and no root-level API key = Honcho not configured
-    has_key = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
-    if not default_block and not has_key:
-        return False
-
-    new_host = f"{HOST}.{profile_name}"
-    if new_host in hosts:
-        return False  # already exists
-
-    # Clone settings from default block, override identity fields
-    new_block = {}
-    for key in ("memoryMode", "recallMode", "writeFrequency", "sessionStrategy",
-                "sessionPeerPrefix", "contextTokens", "dialecticReasoningLevel",
-                "dialecticMaxChars", "saveMessages"):
-        val = default_block.get(key)
-        if val is not None:
-            new_block[key] = val
-
-    # Inherit peer name from default
-    peer_name = default_block.get("peerName") or cfg.get("peerName")
-    if peer_name:
-        new_block["peerName"] = peer_name
-
-    # AI peer is profile-specific; workspace is shared so all profiles
-    # see the same user context, sessions, and project history.
-    # Use the bare profile name as the peer identity (not the host key)
-    # because Honcho's peer ID pattern is ^[a-zA-Z0-9_-]+$ (no dots).
-    new_block["aiPeer"] = profile_name
-    new_block["workspace"] = default_block.get("workspace") or cfg.get("workspace") or HOST
-    new_block["enabled"] = default_block.get("enabled", True)
-
-    cfg.setdefault("hosts", {})[new_host] = new_block
-    _write_config(cfg)
-
-    # Eagerly create the peer in Honcho so it exists before first message
-    _ensure_peer_exists(new_host)
-    return True
-
-
-def _ensure_peer_exists(host_key: str | None = None) -> bool:
-    """Create the AI peer in Honcho if it doesn't already exist.
-
-    Idempotent -- safe to call multiple times. Returns True if the peer
-    was created or already exists, False on failure.
-    """
-    try:
-        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client
-        hcfg = HonchoClientConfig.from_global_config(host=host_key)
-        if not hcfg.enabled or not (hcfg.api_key or hcfg.base_url):
-            return False
-        client = get_honcho_client(hcfg)
-        # peer() is idempotent -- creates if missing, returns if exists
-        client.peer(hcfg.ai_peer)
-        if hcfg.peer_name:
-            client.peer(hcfg.peer_name)
-        return True
-    except Exception:
-        return False
-
-
-def cmd_enable(args) -> None:
-    """Enable Honcho for the active profile."""
-    cfg = _read_config()
-    host = _host_key()
-    label = f"[{host}] " if host != "hermes" else ""
-    block = cfg.setdefault("hosts", {}).setdefault(host, {})
-
-    if block.get("enabled") is True:
-        print(f"  {label}Honcho is already enabled.\n")
-        return
-
-    block["enabled"] = True
-
-    # If this is a new profile host block with no settings, clone from default
-    if not block.get("aiPeer"):
-        default_block = cfg.get("hosts", {}).get(HOST, {})
-        for key in ("memoryMode", "recallMode", "writeFrequency", "sessionStrategy",
-                    "contextTokens", "dialecticReasoningLevel", "dialecticMaxChars"):
-            val = default_block.get(key)
-            if val is not None and key not in block:
-                block[key] = val
-        peer_name = default_block.get("peerName") or cfg.get("peerName")
-        if peer_name and "peerName" not in block:
-            block["peerName"] = peer_name
-        # Use bare profile name as AI peer, not the host key
-        ai_peer = host.split(".", 1)[1] if "." in host else host
-        block.setdefault("aiPeer", ai_peer)
-        block.setdefault("workspace", default_block.get("workspace") or cfg.get("workspace") or HOST)
-
-    _write_config(cfg)
-    print(f"  {label}Honcho enabled.")
-
-    # Create peer eagerly
-    if _ensure_peer_exists(host):
-        print(f"  {label}Peer '{block.get('aiPeer', host)}' ready.")
-    else:
-        print(f"  {label}Peer creation deferred (no connection).")
-
-    print(f"  Saved to {_config_path()}\n")
-
-
-def cmd_disable(args) -> None:
-    """Disable Honcho for the active profile."""
-    cfg = _read_config()
-    host = _host_key()
-    label = f"[{host}] " if host != "hermes" else ""
-    block = cfg.get("hosts", {}).get(host, {})
-
-    if not block or block.get("enabled") is False:
-        print(f"  {label}Honcho is already disabled.\n")
-        return
-
-    block["enabled"] = False
-    _write_config(cfg)
-    print(f"  {label}Honcho disabled.")
-    print(f"  Saved to {_config_path()}\n")
-
-
-def cmd_sync(args) -> None:
-    """Sync Honcho config to all existing profiles.
-
-    Scans all Hermes profiles and creates host blocks for any that don't
-    have one yet. Inherits settings from the default host block.
-    """
-    try:
-        from hermes_cli.profiles import list_profiles
-        profiles = list_profiles()
-    except Exception as e:
-        print(f"  Could not list profiles: {e}\n")
-        return
-
-    cfg = _read_config()
-    if not cfg:
-        print("  No Honcho config found. Run 'hermes honcho setup' first.\n")
-        return
-
-    hosts = cfg.get("hosts", {})
-    default_block = hosts.get(HOST, {})
-    has_key = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
-
-    if not default_block and not has_key:
-        print("  Honcho not configured on default profile. Run 'hermes honcho setup' first.\n")
-        return
-
-    created = 0
-    skipped = 0
-    for p in profiles:
-        if p.name == "default":
-            continue
-        if clone_honcho_for_profile(p.name):
-            print(f"  + {p.name} -> hermes.{p.name}")
-            created += 1
-        else:
-            skipped += 1
-
-    if created:
-        print(f"\n  {created} profile(s) synced.")
-    else:
-        print("  All profiles already have Honcho config.")
-    if skipped:
-        print(f"  {skipped} profile(s) already configured (skipped).")
-    print()
-
-
-def sync_honcho_profiles_quiet() -> int:
-    """Sync Honcho host blocks for all profiles. Returns count of newly created blocks.
-
-    Called from `hermes update` -- no output, no exceptions.
-    """
-    try:
-        from hermes_cli.profiles import list_profiles
-        profiles = list_profiles()
-    except Exception:
-        return 0
-
-    cfg = _read_config()
-    if not cfg:
-        return 0
-
-    default_block = cfg.get("hosts", {}).get(HOST, {})
-    has_key = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
-    if not default_block and not has_key:
-        return 0
-
-    created = 0
-    for p in profiles:
-        if p.name == "default":
-            continue
-        if clone_honcho_for_profile(p.name):
-            created += 1
-    return created
-
-
-_profile_override: str | None = None
-
-
-def _host_key() -> str:
-    """Return the active Honcho host key, derived from the current Hermes profile."""
-    if _profile_override:
-        if _profile_override in ("default", "custom"):
-            return HOST
-        return f"{HOST}.{_profile_override}"
-    return resolve_active_host()
+HOST = "hermes"


 def _config_path() -> Path:
@@ -271,7 +52,7 @@ def _write_config(cfg: dict, path: Path | None = None) -> None:

 def _resolve_api_key(cfg: dict) -> str:
    """Resolve API key with host -> root -> env fallback."""
-    host_key = ((cfg.get("hosts") or {}).get(_host_key()) or {}).get("apiKey")
+    host_key = ((cfg.get("hosts") or {}).get(HOST) or {}).get("apiKey")
    return host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")


@@ -337,10 +118,10 @@ def cmd_setup(args) -> None:
    if not _ensure_sdk_installed():
        return

-    # All writes go to the active host block — root keys are managed by
-    # the user or the honcho CLI only.
+    # All writes go to hosts.hermes — root keys are managed by the user
+    # or the honcho CLI only.
    hosts = cfg.setdefault("hosts", {})
-    hermes_host = hosts.setdefault(_host_key(), {})
+    hermes_host = hosts.setdefault(HOST, {})

    # API key — shared credential, lives at root so all hosts can read it
    current_key = cfg.get("apiKey", "")
@@ -367,7 +148,7 @@ def cmd_setup(args) -> None:
    if new_workspace:
        hermes_host["workspace"] = new_workspace

-    hermes_host.setdefault("aiPeer", _host_key())
+    hermes_host.setdefault("aiPeer", HOST)

    # Memory mode
    current_mode = hermes_host.get("memoryMode") or cfg.get("memoryMode", "hybrid")
@@ -424,9 +205,9 @@ def cmd_setup(args) -> None:
    # Test connection
    print("  Testing connection... ", end="", flush=True)
    try:
-        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client, reset_honcho_client
+        from honcho_integration.client import HonchoClientConfig, get_honcho_client, reset_honcho_client
        reset_honcho_client()
-        hcfg = HonchoClientConfig.from_global_config(host=_host_key())
+        hcfg = HonchoClientConfig.from_global_config()
        get_honcho_client(hcfg)
        print("OK")
    except Exception as e:
@@ -456,53 +237,8 @@ def cmd_setup(args) -> None:
    print("    hermes honcho map <name> — map this directory to a session name\n")


-def _active_profile_name() -> str:
-    """Return the active Hermes profile name (respects --target-profile override)."""
-    if _profile_override:
-        return _profile_override
-    try:
-        from hermes_cli.profiles import get_active_profile_name
-        return get_active_profile_name()
-    except Exception:
-        return "default"
-
-
-def _all_profile_host_configs() -> list[tuple[str, str, dict]]:
-    """Return (profile_name, host_key, host_block) for every known profile.
-
-    Reads honcho.json once and maps each profile to its host block.
-    """
-    try:
-        from hermes_cli.profiles import list_profiles
-        profiles = list_profiles()
-    except Exception:
-        return [(_active_profile_name(), _host_key(), {})]
-
-    cfg = _read_config()
-    hosts = cfg.get("hosts", {})
-    results = []
-
-    # Default profile
-    default_block = hosts.get(HOST, {})
-    results.append(("default", HOST, default_block))
-
-    for p in profiles:
-        if p.name == "default":
-            continue
-        h = f"{HOST}.{p.name}"
-        results.append((p.name, h, hosts.get(h, {})))
-
-    return results
-
-
 def cmd_status(args) -> None:
    """Show current Honcho config and connection status."""
-    show_all = getattr(args, "all", False)
-
-    if show_all:
-        _cmd_status_all()
-        return
-
    try:
        import honcho  # noqa: F401
    except ImportError:
@@ -520,8 +256,8 @@ def cmd_status(args) -> None:
        return

    try:
-        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client
-        hcfg = HonchoClientConfig.from_global_config(host=_host_key())
+        from honcho_integration.client import HonchoClientConfig, get_honcho_client
+        hcfg = HonchoClientConfig.from_global_config()
    except Exception as e:
        print(f"  Config error: {e}\n")
        return
@@ -529,16 +265,11 @@ def cmd_status(args) -> None:
    api_key = hcfg.api_key or ""
    masked = f"...{api_key[-8:]}" if len(api_key) > 8 else ("set" if api_key else "not set")

-    profile = _active_profile_name()
-    profile_label = f" [{hcfg.host}]" if profile != "default" else ""
-
-    print(f"\nHoncho status{profile_label}\n" + "─" * 40)
-    if profile != "default":
-        print(f"  Profile:        {profile}")
-    print(f"  Host:           {hcfg.host}")
+    print("\nHoncho status\n" + "─" * 40)
    print(f"  Enabled:        {hcfg.enabled}")
    print(f"  API key:        {masked}")
    print(f"  Workspace:      {hcfg.workspace_id}")
+    print(f"  Host:           {hcfg.host}")
    print(f"  Config path:    {active_path}")
    if write_path != active_path:
        print(f"  Write path:     {write_path}  (instance-local)")
@@ -556,9 +287,8 @@ def cmd_status(args) -> None:
    if hcfg.enabled and (hcfg.api_key or hcfg.base_url):
        print("\n  Connection... ", end="", flush=True)
        try:
-            client = get_honcho_client(hcfg)
-            print("OK")
-            _show_peer_cards(hcfg, client)
+            get_honcho_client(hcfg)
+            print("OK\n")
        except Exception as e:
            print(f"FAILED ({e})\n")
    else:
@@ -566,90 +296,6 @@ def cmd_status(args) -> None:
        print(f"\n  Not connected ({reason})\n")


-def _show_peer_cards(hcfg, client) -> None:
-    """Fetch and display peer cards for the active profile.
-
-    Uses get_or_create to ensure the session exists with peers configured.
-    This is idempotent -- if the session already exists on the server it's
-    just retrieved, not duplicated.
-    """
-    try:
-        from plugins.memory.honcho.session import HonchoSessionManager
-        mgr = HonchoSessionManager(honcho=client, config=hcfg)
-        session_key = hcfg.resolve_session_name()
-        mgr.get_or_create(session_key)
-
-        # User peer card
-        card = mgr.get_peer_card(session_key)
-        if card:
-            print(f"\n  User peer card ({len(card)} facts):")
-            for fact in card[:10]:
-                print(f"    - {fact}")
-            if len(card) > 10:
-                print(f"    ... and {len(card) - 10} more")
-
-        # AI peer representation
-        ai_rep = mgr.get_ai_representation(session_key)
-        ai_text = ai_rep.get("representation", "")
-        if ai_text:
-            # Truncate to first 200 chars
-            display = ai_text[:200] + ("..." if len(ai_text) > 200 else "")
-            print(f"\n  AI peer representation:")
-            print(f"    {display}")
-
-        if not card and not ai_text:
-            print("\n  No peer data yet (accumulates after first conversation)")
-
-        print()
-    except Exception as e:
-        print(f"\n  Peer data unavailable: {e}\n")
-
-
-def _cmd_status_all() -> None:
-    """Show Honcho config overview across all profiles."""
-    rows = _all_profile_host_configs()
-    cfg = _read_config()
-    active = _active_profile_name()
-
-    print(f"\nHoncho profiles ({len(rows)})\n" + "─" * 60)
-    print(f"  {'Profile':<14} {'Host':<22} {'Enabled':<9} {'Mode':<9} {'Recall':<9} {'Write'}")
-    print(f"  {'─' * 14} {'─' * 22} {'─' * 9} {'─' * 9} {'─' * 9} {'─' * 9}")
-
-    for name, host, block in rows:
-        enabled = block.get("enabled", cfg.get("enabled"))
-        if enabled is None:
-            # Auto-enable check: any credentials?
-            has_creds = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
-            enabled = has_creds if block else False
-        enabled_str = "yes" if enabled else "no"
-
-        mode = block.get("memoryMode") or cfg.get("memoryMode", "hybrid")
-        recall = block.get("recallMode") or cfg.get("recallMode", "hybrid")
-        write = block.get("writeFrequency") or cfg.get("writeFrequency", "async")
-
-        marker = " *" if name == active else ""
-        print(f"  {name + marker:<14} {host:<22} {enabled_str:<9} {mode:<9} {recall:<9} {write}")
-
-    print(f"\n  * active profile\n")
-
-
-def cmd_peers(args) -> None:
-    """Show peer identities across all profiles."""
-    rows = _all_profile_host_configs()
-    cfg = _read_config()
-
-    print(f"\nHoncho peer identities ({len(rows)} profiles)\n" + "─" * 50)
-    print(f"  {'Profile':<14} {'User peer':<16} {'AI peer'}")
-    print(f"  {'─' * 14} {'─' * 16} {'─' * 18}")
-
-    for name, host, block in rows:
-        user = block.get("peerName") or cfg.get("peerName") or "(not set)"
-        ai = block.get("aiPeer") or cfg.get("aiPeer") or host
-        print(f"  {name:<14} {user:<16} {ai}")
-
-    print()
-
-
 def cmd_sessions(args) -> None:
    """List known directory → session name mappings."""
    cfg = _read_config()
@@ -708,9 +354,9 @@ def cmd_peer(args) -> None:
    if user_name is None and ai_name is None and reasoning is None:
        # Show current values
        hosts = cfg.get("hosts", {})
-        hermes = hosts.get(_host_key(), {})
+        hermes = hosts.get(HOST, {})
        user = hermes.get('peerName') or cfg.get('peerName') or '(not set)'
-        ai = hermes.get('aiPeer') or cfg.get('aiPeer') or _host_key()
+        ai = hermes.get('aiPeer') or cfg.get('aiPeer') or HOST
        lvl = hermes.get("dialecticReasoningLevel") or cfg.get("dialecticReasoningLevel") or "low"
        max_chars = hermes.get("dialecticMaxChars") or cfg.get("dialecticMaxChars") or 600
        print("\nHoncho peers\n" + "─" * 40)
@@ -724,26 +370,23 @@ def cmd_peer(args) -> None:
        print(f"  Dialectic cap:        {max_chars} chars\n")
        return

-    host = _host_key()
-    label = f"[{host}] " if host != "hermes" else ""
-
    if user_name is not None:
-        cfg.setdefault("hosts", {}).setdefault(host, {})["peerName"] = user_name.strip()
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["peerName"] = user_name.strip()
        changed = True
-        print(f"  {label}User peer -> {user_name.strip()}")
+        print(f"  User peer → {user_name.strip()}")

    if ai_name is not None:
-        cfg.setdefault("hosts", {}).setdefault(host, {})["aiPeer"] = ai_name.strip()
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["aiPeer"] = ai_name.strip()
        changed = True
-        print(f"  {label}AI peer   -> {ai_name.strip()}")
+        print(f"  AI peer   → {ai_name.strip()}")

    if reasoning is not None:
        if reasoning not in REASONING_LEVELS:
            print(f"  Invalid reasoning level '{reasoning}'. Options: {', '.join(REASONING_LEVELS)}")
            return
-        cfg.setdefault("hosts", {}).setdefault(host, {})["dialecticReasoningLevel"] = reasoning
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["dialecticReasoningLevel"] = reasoning
        changed = True
-        print(f"  {label}Dialectic reasoning level -> {reasoning}")
+        print(f"  Dialectic reasoning level → {reasoning}")

    if changed:
        _write_config(cfg)
@@ -761,7 +404,7 @@ def cmd_mode(args) -> None:

    if mode_arg is None:
        current = (
-            (cfg.get("hosts") or {}).get(_host_key(), {}).get("memoryMode")
+            (cfg.get("hosts") or {}).get(HOST, {}).get("memoryMode")
            or cfg.get("memoryMode")
            or "hybrid"
        )
@@ -776,18 +419,16 @@ def cmd_mode(args) -> None:
        print(f"  Invalid mode '{mode_arg}'. Options: {', '.join(MODES)}\n")
        return

-    host = _host_key()
-    label = f"[{host}] " if host != "hermes" else ""
-    cfg.setdefault("hosts", {}).setdefault(host, {})["memoryMode"] = mode_arg
+    cfg.setdefault("hosts", {}).setdefault(HOST, {})["memoryMode"] = mode_arg
    _write_config(cfg)
-    print(f"  {label}Memory mode -> {mode_arg}  ({MODES[mode_arg]})\n")
+    print(f"  Memory mode → {mode_arg}  ({MODES[mode_arg]})\n")


 def cmd_tokens(args) -> None:
    """Show or set token budget settings."""
    cfg = _read_config()
    hosts = cfg.get("hosts", {})
-    hermes = hosts.get(_host_key(), {})
+    hermes = hosts.get(HOST, {})

    context = getattr(args, "context", None)
    dialectic = getattr(args, "dialectic", None)
@@ -810,16 +451,14 @@ def cmd_tokens(args) -> None:
        print("\n  Set with: hermes honcho tokens [--context N] [--dialectic N]\n")
        return

-    host = _host_key()
-    label = f"[{host}] " if host != "hermes" else ""
    changed = False
    if context is not None:
-        cfg.setdefault("hosts", {}).setdefault(host, {})["contextTokens"] = context
-        print(f"  {label}context tokens -> {context}")
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["contextTokens"] = context
+        print(f"  context tokens → {context}")
        changed = True
    if dialectic is not None:
-        cfg.setdefault("hosts", {}).setdefault(host, {})["dialecticMaxChars"] = dialectic
-        print(f"  {label}dialectic cap  -> {dialectic} chars")
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["dialecticMaxChars"] = dialectic
+        print(f"  dialectic cap  → {dialectic} chars")
        changed = True

    if changed:
@@ -838,9 +477,9 @@ def cmd_identity(args) -> None:
    show = getattr(args, "show", False)

    try:
-        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client
-        from plugins.memory.honcho.session import HonchoSessionManager
-        hcfg = HonchoClientConfig.from_global_config(host=_host_key())
+        from honcho_integration.client import HonchoClientConfig, get_honcho_client
+        from honcho_integration.session import HonchoSessionManager
+        hcfg = HonchoClientConfig.from_global_config()
        client = get_honcho_client(hcfg)
        mgr = HonchoSessionManager(honcho=client, config=hcfg)
        session_key = hcfg.resolve_session_name()
@@ -1003,12 +642,12 @@ def cmd_migrate(args) -> None:
            answer = _prompt("  Upload user memory files to Honcho now?", default="y")
            if answer.lower() in ("y", "yes"):
                try:
-                    from plugins.memory.honcho.client import (
+                    from honcho_integration.client import (
                        HonchoClientConfig,
                        get_honcho_client,
                        reset_honcho_client,
                    )
-                    from plugins.memory.honcho.session import HonchoSessionManager
+                    from honcho_integration.session import HonchoSessionManager

                    reset_honcho_client()
                    hcfg = HonchoClientConfig.from_global_config()
@@ -1053,12 +692,12 @@ def cmd_migrate(args) -> None:
            answer = _prompt("  Seed AI identity from all detected files now?", default="y")
            if answer.lower() in ("y", "yes"):
                try:
-                    from plugins.memory.honcho.client import (
+                    from honcho_integration.client import (
                        HonchoClientConfig,
                        get_honcho_client,
                        reset_honcho_client,
                    )
-                    from plugins.memory.honcho.session import HonchoSessionManager
+                    from honcho_integration.session import HonchoSessionManager

                    reset_honcho_client()
                    hcfg = HonchoClientConfig.from_global_config()
@@ -1131,16 +770,11 @@ def cmd_migrate(args) -> None:

 def honcho_command(args) -> None:
    """Route honcho subcommands."""
-    global _profile_override
-    _profile_override = getattr(args, "target_profile", None)
-
    sub = getattr(args, "honcho_command", None)
    if sub == "setup" or sub is None:
        cmd_setup(args)
    elif sub == "status":
        cmd_status(args)
-    elif sub == "peers":
-        cmd_peers(args)
    elif sub == "sessions":
        cmd_sessions(args)
    elif sub == "map":
@@ -1155,12 +789,6 @@ def honcho_command(args) -> None:
        cmd_identity(args)
    elif sub == "migrate":
        cmd_migrate(args)
-    elif sub == "enable":
-        cmd_enable(args)
-    elif sub == "disable":
-        cmd_disable(args)
-    elif sub == "sync":
-        cmd_sync(args)
    else:
        print(f"  Unknown honcho command: {sub}")
-        print("  Available: setup, status, sessions, map, peer, mode, tokens, identity, migrate, enable, disable, sync\n")
+        print("  Available: setup, status, sessions, map, peer, mode, tokens, identity, migrate\n")
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@@ -31,47 +31,16 @@ GLOBAL_CONFIG_PATH = Path.home() / ".honcho" / "config.json"
 HOST = "hermes"


-def resolve_active_host() -> str:
-    """Derive the Honcho host key from the active Hermes profile.
-
-    Resolution order:
-      1. HERMES_HONCHO_HOST env var (explicit override)
-      2. Active profile name via profiles system -> ``hermes.<profile>``
-      3. Fallback: ``"hermes"`` (default profile)
-    """
-    explicit = os.environ.get("HERMES_HONCHO_HOST", "").strip()
-    if explicit:
-        return explicit
-
-    try:
-        from hermes_cli.profiles import get_active_profile_name
-        profile = get_active_profile_name()
-        if profile and profile not in ("default", "custom"):
-            return f"{HOST}.{profile}"
-    except Exception:
-        pass
-    return HOST
-
-
 def resolve_config_path() -> Path:
    """Return the active Honcho config path.

-    Resolution order:
-      1. $HERMES_HOME/honcho.json      (profile-local, if it exists)
-      2. ~/.hermes/honcho.json          (default profile — shared host blocks live here)
-      3. ~/.honcho/config.json          (global, cross-app interop)
-
-    Returns the global path if none exist (for first-time setup writes).
+    Checks $HERMES_HOME/honcho.json first (instance-local), then falls back
+    to ~/.honcho/config.json (global).  Returns the global path if neither
+    exists (for first-time setup writes).
    """
    local_path = get_hermes_home() / "honcho.json"
    if local_path.exists():
        return local_path
-
-    # Default profile's config — host blocks accumulate here via setup/clone
-    default_path = Path.home() / ".hermes" / "honcho.json"
-    if default_path != local_path and default_path.exists():
-        return default_path
-
    return GLOBAL_CONFIG_PATH


@@ -166,49 +135,40 @@ class HonchoClientConfig:
    explicitly_configured: bool = False

    @classmethod
-    def from_env(
-        cls,
-        workspace_id: str = "hermes",
-        host: str | None = None,
-    ) -> HonchoClientConfig:
+    def from_env(cls, workspace_id: str = "hermes") -> HonchoClientConfig:
        """Create config from environment variables (fallback)."""
-        resolved_host = host or resolve_active_host()
        api_key = os.environ.get("HONCHO_API_KEY")
        base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
        return cls(
-            host=resolved_host,
            workspace_id=workspace_id,
            api_key=api_key,
            environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
            base_url=base_url,
-            ai_peer=resolved_host,
            enabled=bool(api_key or base_url),
        )

    @classmethod
    def from_global_config(
        cls,
-        host: str | None = None,
+        host: str = HOST,
        config_path: Path | None = None,
    ) -> HonchoClientConfig:
        """Create config from the resolved Honcho config path.

        Resolution: $HERMES_HOME/honcho.json -> ~/.honcho/config.json -> env vars.
-        When host is None, derives it from the active Hermes profile.
        """
-        resolved_host = host or resolve_active_host()
        path = config_path or resolve_config_path()
        if not path.exists():
            logger.debug("No global Honcho config at %s, falling back to env", path)
-            return cls.from_env(host=resolved_host)
+            return cls.from_env()

        try:
            raw = json.loads(path.read_text(encoding="utf-8"))
        except (json.JSONDecodeError, OSError) as e:
            logger.warning("Failed to read %s: %s, falling back to env", path, e)
-            return cls.from_env(host=resolved_host)
+            return cls.from_env()

-        host_block = (raw.get("hosts") or {}).get(resolved_host, {})
+        host_block = (raw.get("hosts") or {}).get(host, {})
        # A hosts.hermes block or explicit enabled flag means the user
        # intentionally configured Honcho for this host.
        _explicitly_configured = bool(host_block) or raw.get("enabled") is True
@@ -217,12 +177,12 @@ class HonchoClientConfig:
        workspace = (
            host_block.get("workspace")
            or raw.get("workspace")
-            or resolved_host
+            or host
        )
        ai_peer = (
            host_block.get("aiPeer")
            or raw.get("aiPeer")
-            or resolved_host
+            or host
        )
        linked_hosts = host_block.get("linkedHosts", [])

@@ -282,7 +242,7 @@ class HonchoClientConfig:
        )

        return cls(
-            host=resolved_host,
+            host=host,
            workspace_id=workspace,
            api_key=api_key,
            environment=environment,
--- a/plugins/memory/honcho/session.py
+++ b/plugins/memory/honcho/session.py
@@ -10,7 +10,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Any, TYPE_CHECKING

-from plugins.memory.honcho.client import get_honcho_client
+from honcho_integration.client import get_honcho_client

 if TYPE_CHECKING:
    from honcho import Honcho
@@ -162,17 +162,11 @@ class HonchoSessionManager:
        # Configure peer observation settings.
        # observe_me=True for AI peer so Honcho watches what the agent says
        # and builds its representation over time — enabling identity formation.
-        try:
-            from honcho.session import SessionPeerConfig
-            user_config = SessionPeerConfig(observe_me=True, observe_others=True)
-            ai_config = SessionPeerConfig(observe_me=True, observe_others=True)
+        from honcho.session import SessionPeerConfig
+        user_config = SessionPeerConfig(observe_me=True, observe_others=True)
+        ai_config = SessionPeerConfig(observe_me=True, observe_others=True)

-            session.add_peers([(user_peer, user_config), (assistant_peer, ai_config)])
-        except Exception as e:
-            logger.warning(
-                "Honcho session '%s' add_peers failed (non-fatal): %s",
-                session_id, e,
-            )
+        session.add_peers([(user_peer, user_config), (assistant_peer, ai_config)])

        # Load existing messages via context() - single call for messages + metadata
        existing_messages = []
@@ -237,7 +231,7 @@ class HonchoSessionManager:
            chat_id = parts[1] if len(parts) > 1 else key
            user_peer_id = self._sanitize_id(f"user-{channel}-{chat_id}")

-        assistant_peer_id = self._sanitize_id(
+        assistant_peer_id = (
            self._config.ai_peer if self._config else "hermes-assistant"
        )

--- a/model_tools.py
+++ b/model_tools.py
@@ -156,7 +156,7 @@ def _discover_tools():
        "tools.delegate_tool",
        "tools.process_registry",
        "tools.send_message_tool",
-        # "tools.honcho_tools",  # Removed — Honcho is now a memory provider plugin
+        "tools.honcho_tools",
        "tools.homeassistant_tool",
    ]
    import importlib
@@ -371,6 +371,8 @@ def handle_function_call(
    task_id: Optional[str] = None,
    user_task: Optional[str] = None,
    enabled_tools: Optional[List[str]] = None,
+    honcho_manager: Optional[Any] = None,
+    honcho_session_key: Optional[str] = None,
 ) -> str:
    """
    Main function call dispatcher that routes calls to the tool registry.
@@ -415,12 +417,16 @@ def handle_function_call(
                function_name, function_args,
                task_id=task_id,
                enabled_tools=sandbox_enabled,
+                honcho_manager=honcho_manager,
+                honcho_session_key=honcho_session_key,
            )
        else:
            result = registry.dispatch(
                function_name, function_args,
                task_id=task_id,
                user_task=user_task,
+                honcho_manager=honcho_manager,
+                honcho_session_key=honcho_session_key,
            )

        try:
--- a/plugins/init.py
+++ b/plugins/init.py
@@ -1 +0,0 @@
-# Hermes plugins package
--- a/plugins/memory/init.py
+++ b/plugins/memory/init.py
@@ -1,213 +0,0 @@
-"""Memory provider plugin discovery.
-
-Scans ``plugins/memory/<name>/`` directories for memory provider plugins.
-Each subdirectory must contain ``__init__.py`` with a class implementing
-the MemoryProvider ABC.
-
-Memory providers are separate from the general plugin system — they live
-in the repo and are always available without user installation. Only ONE
-can be active at a time, selected via ``memory.provider`` in config.yaml.
-
-Usage:
-    from plugins.memory import discover_memory_providers, load_memory_provider
-
-    available = discover_memory_providers()   # [(name, desc, available), ...]
-    provider = load_memory_provider("openviking")  # MemoryProvider instance
-"""
-
-from __future__ import annotations
-
-import importlib
-import importlib.util
-import logging
-import sys
-from pathlib import Path
-from typing import List, Optional, Tuple
-
-logger = logging.getLogger(__name__)
-
-_MEMORY_PLUGINS_DIR = Path(__file__).parent
-
-
-def discover_memory_providers() -> List[Tuple[str, str, bool]]:
-    """Scan plugins/memory/ for available providers.
-
-    Returns list of (name, description, is_available) tuples.
-    Does NOT import the providers — just reads plugin.yaml for metadata
-    and does a lightweight availability check.
-    """
-    results = []
-    if not _MEMORY_PLUGINS_DIR.is_dir():
-        return results
-
-    for child in sorted(_MEMORY_PLUGINS_DIR.iterdir()):
-        if not child.is_dir() or child.name.startswith(("_", ".")):
-            continue
-        init_file = child / "__init__.py"
-        if not init_file.exists():
-            continue
-
-        # Read description from plugin.yaml if available
-        desc = ""
-        yaml_file = child / "plugin.yaml"
-        if yaml_file.exists():
-            try:
-                import yaml
-                with open(yaml_file) as f:
-                    meta = yaml.safe_load(f) or {}
-                desc = meta.get("description", "")
-            except Exception:
-                pass
-
-        # Quick availability check — try loading and calling is_available()
-        available = True
-        try:
-            provider = _load_provider_from_dir(child)
-            if provider:
-                available = provider.is_available()
-            else:
-                available = False
-        except Exception:
-            available = False
-
-        results.append((child.name, desc, available))
-
-    return results
-
-
-def load_memory_provider(name: str) -> Optional["MemoryProvider"]:
-    """Load and return a MemoryProvider instance by name.
-
-    Returns None if the provider is not found or fails to load.
-    """
-    provider_dir = _MEMORY_PLUGINS_DIR / name
-    if not provider_dir.is_dir():
-        logger.debug("Memory provider '%s' not found in %s", name, _MEMORY_PLUGINS_DIR)
-        return None
-
-    try:
-        provider = _load_provider_from_dir(provider_dir)
-        if provider:
-            return provider
-        logger.warning("Memory provider '%s' loaded but no provider instance found", name)
-        return None
-    except Exception as e:
-        logger.warning("Failed to load memory provider '%s': %s", name, e)
-        return None
-
-
-def _load_provider_from_dir(provider_dir: Path) -> Optional["MemoryProvider"]:
-    """Import a provider module and extract the MemoryProvider instance.
-
-    The module must have either:
-    - A register(ctx) function (plugin-style) — we simulate a ctx
-    - A top-level class that extends MemoryProvider — we instantiate it
-    """
-    name = provider_dir.name
-    module_name = f"plugins.memory.{name}"
-    init_file = provider_dir / "__init__.py"
-
-    if not init_file.exists():
-        return None
-
-    # Check if already loaded
-    if module_name in sys.modules:
-        mod = sys.modules[module_name]
-    else:
-        # Handle relative imports within the plugin
-        # First ensure the parent packages are registered
-        for parent in ("plugins", "plugins.memory"):
-            if parent not in sys.modules:
-                parent_path = Path(__file__).parent
-                if parent == "plugins":
-                    parent_path = parent_path.parent
-                parent_init = parent_path / "__init__.py"
-                if parent_init.exists():
-                    spec = importlib.util.spec_from_file_location(
-                        parent, str(parent_init),
-                        submodule_search_locations=[str(parent_path)]
-                    )
-                    if spec:
-                        parent_mod = importlib.util.module_from_spec(spec)
-                        sys.modules[parent] = parent_mod
-                        try:
-                            spec.loader.exec_module(parent_mod)
-                        except Exception:
-                            pass
-
-        # Now load the provider module
-        spec = importlib.util.spec_from_file_location(
-            module_name, str(init_file),
-            submodule_search_locations=[str(provider_dir)]
-        )
-        if not spec:
-            return None
-
-        mod = importlib.util.module_from_spec(spec)
-        sys.modules[module_name] = mod
-
-        # Register submodules so relative imports work
-        # e.g., "from .store import MemoryStore" in holographic plugin
-        for sub_file in provider_dir.glob("*.py"):
-            if sub_file.name == "__init__.py":
-                continue
-            sub_name = sub_file.stem
-            full_sub_name = f"{module_name}.{sub_name}"
-            if full_sub_name not in sys.modules:
-                sub_spec = importlib.util.spec_from_file_location(
-                    full_sub_name, str(sub_file)
-                )
-                if sub_spec:
-                    sub_mod = importlib.util.module_from_spec(sub_spec)
-                    sys.modules[full_sub_name] = sub_mod
-                    try:
-                        sub_spec.loader.exec_module(sub_mod)
-                    except Exception as e:
-                        logger.debug("Failed to load submodule %s: %s", full_sub_name, e)
-
-        try:
-            spec.loader.exec_module(mod)
-        except Exception as e:
-            logger.debug("Failed to exec_module %s: %s", module_name, e)
-            sys.modules.pop(module_name, None)
-            return None
-
-    # Try register(ctx) pattern first (how our plugins are written)
-    if hasattr(mod, "register"):
-        collector = _ProviderCollector()
-        try:
-            mod.register(collector)
-            if collector.provider:
-                return collector.provider
-        except Exception as e:
-            logger.debug("register() failed for %s: %s", name, e)
-
-    # Fallback: find a MemoryProvider subclass and instantiate it
-    from agent.memory_provider import MemoryProvider
-    for attr_name in dir(mod):
-        attr = getattr(mod, attr_name, None)
-        if (isinstance(attr, type) and issubclass(attr, MemoryProvider)
-                and attr is not MemoryProvider):
-            try:
-                return attr()
-            except Exception:
-                pass
-
-    return None
-
-
-class _ProviderCollector:
-    """Fake plugin context that captures register_memory_provider calls."""
-
-    def __init__(self):
-        self.provider = None
-
-    def register_memory_provider(self, provider):
-        self.provider = provider
-
-    # No-op for other registration methods
-    def register_tool(self, *args, **kwargs):
-        pass
-
-    def register_hook(self, *args, **kwargs):
-        pass
--- a/plugins/memory/byterover/README.md
+++ b/plugins/memory/byterover/README.md
@@ -1,41 +0,0 @@
-# ByteRover Memory Provider
-
-Persistent memory via the `brv` CLI — hierarchical knowledge tree with tiered retrieval (fuzzy text → LLM-driven search).
-
-## Requirements
-
-Install the ByteRover CLI:
-```bash
-curl -fsSL https://byterover.dev/install.sh | sh
-# or
-npm install -g byterover-cli
-```
-
-## Setup
-
-```bash
-hermes memory setup    # select "byterover"
-```
-
-Or manually:
-```bash
-hermes config set memory.provider byterover
-# Optional cloud sync:
-echo "BRV_API_KEY=your-key" >> ~/.hermes/.env
-```
-
-## Config
-
-| Env Var | Required | Description |
-|---------|----------|-------------|
-| `BRV_API_KEY` | No | Cloud sync key (optional, local-first by default) |
-
-Working directory: `$HERMES_HOME/byterover/` (profile-scoped).
-
-## Tools
-
-| Tool | Description |
-|------|-------------|
-| `brv_query` | Search the knowledge tree |
-| `brv_curate` | Store facts, decisions, patterns |
-| `brv_status` | CLI version, tree stats, sync state |
--- a/plugins/memory/byterover/init.py
+++ b/plugins/memory/byterover/init.py
@@ -1,398 +0,0 @@
-"""ByteRover memory plugin — MemoryProvider interface.
-
-Persistent memory via the ByteRover CLI (``brv``). Organizes knowledge into
-a hierarchical context tree with tiered retrieval (fuzzy text → LLM-driven
-search). Local-first with optional cloud sync.
-
-Original PR #3499 by hieuntg81, adapted to MemoryProvider ABC.
-
-Requires: ``brv`` CLI installed (npm install -g byterover-cli or
-curl -fsSL https://byterover.dev/install.sh | sh).
-
-Config via environment variables (profile-scoped via each profile's .env):
-  BRV_API_KEY   — ByteRover API key (for cloud features, optional for local)
-
-Working directory: $HERMES_HOME/byterover/ (profile-scoped context tree)
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import shutil
-import subprocess
-import threading
-import time
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-# Timeouts
-_QUERY_TIMEOUT = 30   # brv query — should be fast
-_CURATE_TIMEOUT = 120  # brv curate — may involve LLM processing
-
-# Minimum lengths to filter noise
-_MIN_QUERY_LEN = 10
-_MIN_OUTPUT_LEN = 20
-
-
-# ---------------------------------------------------------------------------
-# brv binary resolution (cached, thread-safe)
-# ---------------------------------------------------------------------------
-
-_brv_path_lock = threading.Lock()
-_cached_brv_path: Optional[str] = None
-
-
-def _resolve_brv_path() -> Optional[str]:
-    """Find the brv binary on PATH or well-known install locations."""
-    global _cached_brv_path
-    with _brv_path_lock:
-        if _cached_brv_path is not None:
-            return _cached_brv_path if _cached_brv_path != "" else None
-
-    found = shutil.which("brv")
-    if not found:
-        home = Path.home()
-        candidates = [
-            home / ".brv-cli" / "bin" / "brv",
-            Path("/usr/local/bin/brv"),
-            home / ".npm-global" / "bin" / "brv",
-        ]
-        for c in candidates:
-            if c.exists():
-                found = str(c)
-                break
-
-    with _brv_path_lock:
-        if _cached_brv_path is not None:
-            return _cached_brv_path if _cached_brv_path != "" else None
-        _cached_brv_path = found or ""
-    return found
-
-
-def _run_brv(args: List[str], timeout: int = _QUERY_TIMEOUT,
-             cwd: str = None) -> dict:
-    """Run a brv CLI command. Returns {success, output, error}."""
-    brv_path = _resolve_brv_path()
-    if not brv_path:
-        return {"success": False, "error": "brv CLI not found. Install: npm install -g byterover-cli"}
-
-    cmd = [brv_path] + args
-    effective_cwd = cwd or str(_get_brv_cwd())
-    Path(effective_cwd).mkdir(parents=True, exist_ok=True)
-
-    env = os.environ.copy()
-    brv_bin_dir = str(Path(brv_path).parent)
-    env["PATH"] = brv_bin_dir + os.pathsep + env.get("PATH", "")
-
-    try:
-        result = subprocess.run(
-            cmd, capture_output=True, text=True,
-            timeout=timeout, cwd=effective_cwd, env=env,
-        )
-        stdout = result.stdout.strip()
-        stderr = result.stderr.strip()
-
-        if result.returncode == 0:
-            return {"success": True, "output": stdout}
-        return {"success": False, "error": stderr or stdout or f"brv exited {result.returncode}"}
-
-    except subprocess.TimeoutExpired:
-        return {"success": False, "error": f"brv timed out after {timeout}s"}
-    except FileNotFoundError:
-        global _cached_brv_path
-        with _brv_path_lock:
-            _cached_brv_path = None
-        return {"success": False, "error": "brv CLI not found"}
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-
-
-def _get_brv_cwd() -> Path:
-    """Profile-scoped working directory for the brv context tree."""
-    from hermes_constants import get_hermes_home
-    return get_hermes_home() / "byterover"
-
-
-# ---------------------------------------------------------------------------
-# Tool schemas
-# ---------------------------------------------------------------------------
-
-QUERY_SCHEMA = {
-    "name": "brv_query",
-    "description": (
-        "Search ByteRover's persistent knowledge tree for relevant context. "
-        "Returns memories, project knowledge, architectural decisions, and "
-        "patterns from previous sessions. Use for any question where past "
-        "context would help."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {"type": "string", "description": "What to search for."},
-        },
-        "required": ["query"],
-    },
-}
-
-CURATE_SCHEMA = {
-    "name": "brv_curate",
-    "description": (
-        "Store important information in ByteRover's persistent knowledge tree. "
-        "Use for architectural decisions, bug fixes, user preferences, project "
-        "patterns — anything worth remembering across sessions. ByteRover's LLM "
-        "automatically categorizes and organizes the memory."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "content": {"type": "string", "description": "The information to remember."},
-        },
-        "required": ["content"],
-    },
-}
-
-STATUS_SCHEMA = {
-    "name": "brv_status",
-    "description": "Check ByteRover status — CLI version, context tree stats, cloud sync state.",
-    "parameters": {"type": "object", "properties": {}, "required": []},
-}
-
-
-# ---------------------------------------------------------------------------
-# MemoryProvider implementation
-# ---------------------------------------------------------------------------
-
-class ByteRoverMemoryProvider(MemoryProvider):
-    """ByteRover persistent memory via the brv CLI."""
-
-    def __init__(self):
-        self._cwd = ""
-        self._session_id = ""
-        self._turn_count = 0
-        self._prefetch_result = ""
-        self._prefetch_lock = threading.Lock()
-        self._prefetch_thread: Optional[threading.Thread] = None
-        self._sync_thread: Optional[threading.Thread] = None
-
-    @property
-    def name(self) -> str:
-        return "byterover"
-
-    def is_available(self) -> bool:
-        """Check if brv CLI is installed. No network calls."""
-        return _resolve_brv_path() is not None
-
-    def get_config_schema(self):
-        return [
-            {
-                "key": "api_key",
-                "description": "ByteRover API key (optional, for cloud sync)",
-                "secret": True,
-                "env_var": "BRV_API_KEY",
-                "url": "https://app.byterover.dev",
-            },
-        ]
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        self._cwd = str(_get_brv_cwd())
-        self._session_id = session_id
-        self._turn_count = 0
-        Path(self._cwd).mkdir(parents=True, exist_ok=True)
-
-    def system_prompt_block(self) -> str:
-        if not _resolve_brv_path():
-            return ""
-        return (
-            "# ByteRover Memory\n"
-            "Active. Persistent knowledge tree with hierarchical context.\n"
-            "Use brv_query to search past knowledge, brv_curate to store "
-            "important facts, brv_status to check state."
-        )
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        if self._prefetch_thread and self._prefetch_thread.is_alive():
-            self._prefetch_thread.join(timeout=3.0)
-        with self._prefetch_lock:
-            result = self._prefetch_result
-            self._prefetch_result = ""
-        if not result:
-            return ""
-        return f"## ByteRover Context\n{result}"
-
-    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        if not query or len(query.strip()) < _MIN_QUERY_LEN:
-            return
-
-        def _run():
-            try:
-                result = _run_brv(
-                    ["query", "--", query.strip()[:5000]],
-                    timeout=_QUERY_TIMEOUT, cwd=self._cwd,
-                )
-                if result["success"] and result.get("output"):
-                    output = result["output"].strip()
-                    if len(output) > _MIN_OUTPUT_LEN:
-                        with self._prefetch_lock:
-                            self._prefetch_result = output
-            except Exception as e:
-                logger.debug("ByteRover prefetch failed: %s", e)
-
-        self._prefetch_thread = threading.Thread(
-            target=_run, daemon=True, name="brv-prefetch"
-        )
-        self._prefetch_thread.start()
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Curate the conversation turn in background (non-blocking)."""
-        self._turn_count += 1
-
-        # Only curate substantive turns
-        if len(user_content.strip()) < _MIN_QUERY_LEN:
-            return
-
-        def _sync():
-            try:
-                combined = f"User: {user_content[:2000]}\nAssistant: {assistant_content[:2000]}"
-                _run_brv(
-                    ["curate", "--", combined],
-                    timeout=_CURATE_TIMEOUT, cwd=self._cwd,
-                )
-            except Exception as e:
-                logger.debug("ByteRover sync failed: %s", e)
-
-        # Wait for previous sync
-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=5.0)
-
-        self._sync_thread = threading.Thread(
-            target=_sync, daemon=True, name="brv-sync"
-        )
-        self._sync_thread.start()
-
-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        """Mirror built-in memory writes to ByteRover."""
-        if action not in ("add", "replace") or not content:
-            return
-
-        def _write():
-            try:
-                label = "User profile" if target == "user" else "Agent memory"
-                _run_brv(
-                    ["curate", "--", f"[{label}] {content}"],
-                    timeout=_CURATE_TIMEOUT, cwd=self._cwd,
-                )
-            except Exception as e:
-                logger.debug("ByteRover memory mirror failed: %s", e)
-
-        t = threading.Thread(target=_write, daemon=True, name="brv-memwrite")
-        t.start()
-
-    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
-        """Extract insights before context compression discards turns."""
-        if not messages:
-            return ""
-
-        # Build a summary of messages about to be compressed
-        parts = []
-        for msg in messages[-10:]:  # last 10 messages
-            role = msg.get("role", "")
-            content = msg.get("content", "")
-            if isinstance(content, str) and content.strip() and role in ("user", "assistant"):
-                parts.append(f"{role}: {content[:500]}")
-
-        if not parts:
-            return ""
-
-        combined = "\n".join(parts)
-
-        def _flush():
-            try:
-                _run_brv(
-                    ["curate", "--", f"[Pre-compression context]\n{combined}"],
-                    timeout=_CURATE_TIMEOUT, cwd=self._cwd,
-                )
-                logger.info("ByteRover pre-compression flush: %d messages", len(parts))
-            except Exception as e:
-                logger.debug("ByteRover pre-compression flush failed: %s", e)
-
-        t = threading.Thread(target=_flush, daemon=True, name="brv-flush")
-        t.start()
-        return ""
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        return [QUERY_SCHEMA, CURATE_SCHEMA, STATUS_SCHEMA]
-
-    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
-        if tool_name == "brv_query":
-            return self._tool_query(args)
-        elif tool_name == "brv_curate":
-            return self._tool_curate(args)
-        elif tool_name == "brv_status":
-            return self._tool_status()
-        return json.dumps({"error": f"Unknown tool: {tool_name}"})
-
-    def shutdown(self) -> None:
-        for t in (self._sync_thread, self._prefetch_thread):
-            if t and t.is_alive():
-                t.join(timeout=10.0)
-
-    # -- Tool implementations ------------------------------------------------
-
-    def _tool_query(self, args: dict) -> str:
-        query = args.get("query", "")
-        if not query:
-            return json.dumps({"error": "query is required"})
-
-        result = _run_brv(
-            ["query", "--", query.strip()[:5000]],
-            timeout=_QUERY_TIMEOUT, cwd=self._cwd,
-        )
-
-        if not result["success"]:
-            return json.dumps({"error": result.get("error", "Query failed")})
-
-        output = result.get("output", "").strip()
-        if not output or len(output) < _MIN_OUTPUT_LEN:
-            return json.dumps({"result": "No relevant memories found."})
-
-        # Truncate very long results
-        if len(output) > 8000:
-            output = output[:8000] + "\n\n[... truncated]"
-
-        return json.dumps({"result": output})
-
-    def _tool_curate(self, args: dict) -> str:
-        content = args.get("content", "")
-        if not content:
-            return json.dumps({"error": "content is required"})
-
-        result = _run_brv(
-            ["curate", "--", content],
-            timeout=_CURATE_TIMEOUT, cwd=self._cwd,
-        )
-
-        if not result["success"]:
-            return json.dumps({"error": result.get("error", "Curate failed")})
-
-        return json.dumps({"result": "Memory curated successfully."})
-
-    def _tool_status(self) -> str:
-        result = _run_brv(["status"], timeout=15, cwd=self._cwd)
-        if not result["success"]:
-            return json.dumps({"error": result.get("error", "Status check failed")})
-        return json.dumps({"status": result.get("output", "")})
-
-
-# ---------------------------------------------------------------------------
-# Plugin entry point
-# ---------------------------------------------------------------------------
-
-def register(ctx) -> None:
-    """Register ByteRover as a memory provider plugin."""
-    ctx.register_memory_provider(ByteRoverMemoryProvider())
--- a/plugins/memory/byterover/plugin.yaml
+++ b/plugins/memory/byterover/plugin.yaml
@@ -1,9 +0,0 @@
-name: byterover
-version: 1.0.0
-description: "ByteRover — persistent knowledge tree with tiered retrieval via the brv CLI."
-external_dependencies:
-  - name: brv
-    install: "curl -fsSL https://byterover.dev/install.sh | sh"
-    check: "brv --version"
-hooks:
-  - on_pre_compress
--- a/plugins/memory/hindsight/README.md
+++ b/plugins/memory/hindsight/README.md
@@ -1,38 +0,0 @@
-# Hindsight Memory Provider
-
-Long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval. Supports cloud and local modes.
-
-## Requirements
-
- Cloud: `pip install hindsight-client` + API key from [app.hindsight.vectorize.io](https://app.hindsight.vectorize.io)
- Local: `pip install hindsight` + LLM API key for embeddings
-
-## Setup
-
-```bash
-hermes memory setup    # select "hindsight"
-```
-
-Or manually:
-```bash
-hermes config set memory.provider hindsight
-echo "HINDSIGHT_API_KEY=your-key" >> ~/.hermes/.env
-```
-
-## Config
-
-Config file: `$HERMES_HOME/hindsight/config.json` (or `~/.hindsight/config.json` legacy)
-
-| Key | Default | Description |
-|-----|---------|-------------|
-| `mode` | `cloud` | `cloud` or `local` |
-| `bank_id` | `hermes` | Memory bank identifier |
-| `budget` | `mid` | Recall thoroughness: `low`/`mid`/`high` |
-
-## Tools
-
-| Tool | Description |
-|------|-------------|
-| `hindsight_retain` | Store information with auto entity extraction |
-| `hindsight_recall` | Multi-strategy search (semantic + entity graph) |
-| `hindsight_reflect` | Cross-memory synthesis (LLM-powered) |
--- a/plugins/memory/hindsight/init.py
+++ b/plugins/memory/hindsight/init.py
@@ -1,358 +0,0 @@
-"""Hindsight memory plugin — MemoryProvider interface.
-
-Long-term memory with knowledge graph, entity resolution, and multi-strategy
-retrieval. Supports cloud (API key) and local (embedded PostgreSQL) modes.
-
-Original PR #1811 by benfrank241, adapted to MemoryProvider ABC.
-
-Config via environment variables:
-  HINDSIGHT_API_KEY   — API key for Hindsight Cloud
-  HINDSIGHT_BANK_ID   — memory bank identifier (default: hermes)
-  HINDSIGHT_BUDGET    — recall budget: low/mid/high (default: mid)
-  HINDSIGHT_API_URL   — API endpoint
-  HINDSIGHT_MODE      — cloud or local (default: cloud)
-
-Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to
-~/.hindsight/config.json (legacy, shared) for backward compatibility.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import queue
-import threading
-from typing import Any, Dict, List
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-_DEFAULT_API_URL = "https://api.hindsight.vectorize.io"
-_VALID_BUDGETS = {"low", "mid", "high"}
-
-
-# ---------------------------------------------------------------------------
-# Thread helper (from original PR — avoids aiohttp event loop conflicts)
-# ---------------------------------------------------------------------------
-
-def _run_in_thread(fn, timeout: float = 30.0):
-    result_q: queue.Queue = queue.Queue(maxsize=1)
-
-    def _run():
-        import asyncio
-        asyncio.set_event_loop(None)
-        try:
-            result_q.put(("ok", fn()))
-        except Exception as exc:
-            result_q.put(("err", exc))
-
-    t = threading.Thread(target=_run, daemon=True, name="hindsight-call")
-    t.start()
-    kind, value = result_q.get(timeout=timeout)
-    if kind == "err":
-        raise value
-    return value
-
-
-# ---------------------------------------------------------------------------
-# Tool schemas
-# ---------------------------------------------------------------------------
-
-RETAIN_SCHEMA = {
-    "name": "hindsight_retain",
-    "description": (
-        "Store information to long-term memory. Hindsight automatically "
-        "extracts structured facts, resolves entities, and indexes for retrieval."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "content": {"type": "string", "description": "The information to store."},
-            "context": {"type": "string", "description": "Short label (e.g. 'user preference', 'project decision')."},
-        },
-        "required": ["content"],
-    },
-}
-
-RECALL_SCHEMA = {
-    "name": "hindsight_recall",
-    "description": (
-        "Search long-term memory. Returns memories ranked by relevance using "
-        "semantic search, keyword matching, entity graph traversal, and reranking."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {"type": "string", "description": "What to search for."},
-        },
-        "required": ["query"],
-    },
-}
-
-REFLECT_SCHEMA = {
-    "name": "hindsight_reflect",
-    "description": (
-        "Synthesize a reasoned answer from long-term memories. Unlike recall, "
-        "this reasons across all stored memories to produce a coherent response."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {"type": "string", "description": "The question to reflect on."},
-        },
-        "required": ["query"],
-    },
-}
-
-
-# ---------------------------------------------------------------------------
-# Config
-# ---------------------------------------------------------------------------
-
-def _load_config() -> dict:
-    """Load config from profile-scoped path, legacy path, or env vars.
-
-    Resolution order:
-      1. $HERMES_HOME/hindsight/config.json  (profile-scoped)
-      2. ~/.hindsight/config.json             (legacy, shared)
-      3. Environment variables
-    """
-    from pathlib import Path
-    from hermes_constants import get_hermes_home
-
-    # Profile-scoped path (preferred)
-    profile_path = get_hermes_home() / "hindsight" / "config.json"
-    if profile_path.exists():
-        try:
-            return json.loads(profile_path.read_text(encoding="utf-8"))
-        except Exception:
-            pass
-
-    # Legacy shared path (backward compat)
-    legacy_path = Path.home() / ".hindsight" / "config.json"
-    if legacy_path.exists():
-        try:
-            return json.loads(legacy_path.read_text(encoding="utf-8"))
-        except Exception:
-            pass
-
-    return {
-        "mode": os.environ.get("HINDSIGHT_MODE", "cloud"),
-        "apiKey": os.environ.get("HINDSIGHT_API_KEY", ""),
-        "banks": {
-            "hermes": {
-                "bankId": os.environ.get("HINDSIGHT_BANK_ID", "hermes"),
-                "budget": os.environ.get("HINDSIGHT_BUDGET", "mid"),
-                "enabled": True,
-            }
-        },
-    }
-
-
-# ---------------------------------------------------------------------------
-# MemoryProvider implementation
-# ---------------------------------------------------------------------------
-
-class HindsightMemoryProvider(MemoryProvider):
-    """Hindsight long-term memory with knowledge graph and multi-strategy retrieval."""
-
-    def __init__(self):
-        self._config = None
-        self._api_key = None
-        self._bank_id = "hermes"
-        self._budget = "mid"
-        self._mode = "cloud"
-        self._prefetch_result = ""
-        self._prefetch_lock = threading.Lock()
-        self._prefetch_thread = None
-        self._sync_thread = None
-
-    @property
-    def name(self) -> str:
-        return "hindsight"
-
-    def is_available(self) -> bool:
-        try:
-            cfg = _load_config()
-            mode = cfg.get("mode", "cloud")
-            if mode == "local":
-                embed = cfg.get("embed", {})
-                return bool(embed.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY"))
-            api_key = cfg.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", "")
-            return bool(api_key)
-        except Exception:
-            return False
-
-    def save_config(self, values, hermes_home):
-        """Write config to $HERMES_HOME/hindsight/config.json."""
-        import json
-        from pathlib import Path
-        config_dir = Path(hermes_home) / "hindsight"
-        config_dir.mkdir(parents=True, exist_ok=True)
-        config_path = config_dir / "config.json"
-        existing = {}
-        if config_path.exists():
-            try:
-                existing = json.loads(config_path.read_text())
-            except Exception:
-                pass
-        existing.update(values)
-        config_path.write_text(json.dumps(existing, indent=2))
-
-    def get_config_schema(self):
-        return [
-            {"key": "mode", "description": "Cloud API or local embedded mode", "default": "cloud", "choices": ["cloud", "local"]},
-            {"key": "api_key", "description": "Hindsight Cloud API key", "secret": True, "env_var": "HINDSIGHT_API_KEY", "url": "https://app.hindsight.vectorize.io"},
-            {"key": "bank_id", "description": "Memory bank identifier", "default": "hermes"},
-            {"key": "budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
-            {"key": "llm_provider", "description": "LLM provider for local mode", "default": "anthropic", "choices": ["anthropic", "openai", "groq", "ollama"]},
-            {"key": "llm_api_key", "description": "LLM API key for local mode", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY"},
-            {"key": "llm_model", "description": "LLM model for local mode", "default": "claude-haiku-4-5-20251001"},
-        ]
-
-    def _make_client(self):
-        """Create a fresh Hindsight client (thread-safe)."""
-        if self._mode == "local":
-            from hindsight import HindsightEmbedded
-            embed = self._config.get("embed", {})
-            return HindsightEmbedded(
-                profile=embed.get("profile", "hermes"),
-                llm_provider=embed.get("llmProvider", ""),
-                llm_api_key=embed.get("llmApiKey", ""),
-                llm_model=embed.get("llmModel", ""),
-            )
-        from hindsight_client import Hindsight
-        return Hindsight(api_key=self._api_key, timeout=30.0)
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        self._config = _load_config()
-        self._mode = self._config.get("mode", "cloud")
-        self._api_key = self._config.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", "")
-
-        banks = self._config.get("banks", {}).get("hermes", {})
-        self._bank_id = banks.get("bankId", "hermes")
-        budget = banks.get("budget", "mid")
-        self._budget = budget if budget in _VALID_BUDGETS else "mid"
-
-        # Ensure bank exists
-        try:
-            client = _run_in_thread(self._make_client)
-            _run_in_thread(lambda: client.create_bank(bank_id=self._bank_id, name=self._bank_id))
-        except Exception:
-            pass  # Already exists
-
-    def system_prompt_block(self) -> str:
-        return (
-            f"# Hindsight Memory\n"
-            f"Active. Bank: {self._bank_id}, budget: {self._budget}.\n"
-            f"Use hindsight_recall to search, hindsight_reflect for synthesis, "
-            f"hindsight_retain to store facts."
-        )
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        if self._prefetch_thread and self._prefetch_thread.is_alive():
-            self._prefetch_thread.join(timeout=3.0)
-        with self._prefetch_lock:
-            result = self._prefetch_result
-            self._prefetch_result = ""
-        if not result:
-            return ""
-        return f"## Hindsight Memory\n{result}"
-
-    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        def _run():
-            try:
-                client = self._make_client()
-                resp = client.recall(bank_id=self._bank_id, query=query, budget=self._budget)
-                if resp.results:
-                    text = "\n".join(r.text for r in resp.results if r.text)
-                    with self._prefetch_lock:
-                        self._prefetch_result = text
-            except Exception as e:
-                logger.debug("Hindsight prefetch failed: %s", e)
-
-        self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="hindsight-prefetch")
-        self._prefetch_thread.start()
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Retain conversation turn in background (non-blocking)."""
-        combined = f"User: {user_content}\nAssistant: {assistant_content}"
-
-        def _sync():
-            try:
-                _run_in_thread(
-                    lambda: self._make_client().retain(
-                        bank_id=self._bank_id, content=combined, context="conversation"
-                    )
-                )
-            except Exception as e:
-                logger.warning("Hindsight sync failed: %s", e)
-
-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=5.0)
-        self._sync_thread = threading.Thread(target=_sync, daemon=True, name="hindsight-sync")
-        self._sync_thread.start()
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        return [RETAIN_SCHEMA, RECALL_SCHEMA, REFLECT_SCHEMA]
-
-    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
-        if tool_name == "hindsight_retain":
-            content = args.get("content", "")
-            if not content:
-                return json.dumps({"error": "Missing required parameter: content"})
-            context = args.get("context")
-            try:
-                _run_in_thread(
-                    lambda: self._make_client().retain(
-                        bank_id=self._bank_id, content=content, context=context
-                    )
-                )
-                return json.dumps({"result": "Memory stored successfully."})
-            except Exception as e:
-                return json.dumps({"error": f"Failed to store memory: {e}"})
-
-        elif tool_name == "hindsight_recall":
-            query = args.get("query", "")
-            if not query:
-                return json.dumps({"error": "Missing required parameter: query"})
-            try:
-                resp = _run_in_thread(
-                    lambda: self._make_client().recall(
-                        bank_id=self._bank_id, query=query, budget=self._budget
-                    )
-                )
-                if not resp.results:
-                    return json.dumps({"result": "No relevant memories found."})
-                lines = [f"{i}. {r.text}" for i, r in enumerate(resp.results, 1)]
-                return json.dumps({"result": "\n".join(lines)})
-            except Exception as e:
-                return json.dumps({"error": f"Failed to search memory: {e}"})
-
-        elif tool_name == "hindsight_reflect":
-            query = args.get("query", "")
-            if not query:
-                return json.dumps({"error": "Missing required parameter: query"})
-            try:
-                resp = _run_in_thread(
-                    lambda: self._make_client().reflect(
-                        bank_id=self._bank_id, query=query, budget=self._budget
-                    )
-                )
-                return json.dumps({"result": resp.text or "No relevant memories found."})
-            except Exception as e:
-                return json.dumps({"error": f"Failed to reflect: {e}"})
-
-        return json.dumps({"error": f"Unknown tool: {tool_name}"})
-
-    def shutdown(self) -> None:
-        for t in (self._prefetch_thread, self._sync_thread):
-            if t and t.is_alive():
-                t.join(timeout=5.0)
-
-
-def register(ctx) -> None:
-    """Register Hindsight as a memory provider plugin."""
-    ctx.register_memory_provider(HindsightMemoryProvider())
--- a/plugins/memory/hindsight/plugin.yaml
+++ b/plugins/memory/hindsight/plugin.yaml
@@ -1,9 +0,0 @@
-name: hindsight
-version: 1.0.0
-description: "Hindsight — long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval."
-pip_dependencies:
-  - hindsight-client
-requires_env:
-  - HINDSIGHT_API_KEY
-hooks:
-  - on_session_end
--- a/plugins/memory/holographic/README.md
+++ b/plugins/memory/holographic/README.md
@@ -1,36 +0,0 @@
-# Holographic Memory Provider
-
-Local SQLite fact store with FTS5 search, trust scoring, entity resolution, and HRR-based compositional retrieval.
-
-## Requirements
-
-None — uses SQLite (always available). NumPy optional for HRR algebra.
-
-## Setup
-
-```bash
-hermes memory setup    # select "holographic"
-```
-
-Or manually:
-```bash
-hermes config set memory.provider holographic
-```
-
-## Config
-
-Config in `config.yaml` under `plugins.hermes-memory-store`:
-
-| Key | Default | Description |
-|-----|---------|-------------|
-| `db_path` | `$HERMES_HOME/memory_store.db` | SQLite database path |
-| `auto_extract` | `false` | Auto-extract facts at session end |
-| `default_trust` | `0.5` | Default trust score for new facts |
-| `hrr_dim` | `1024` | HRR vector dimensions |
-
-## Tools
-
-| Tool | Description |
-|------|-------------|
-| `fact_store` | 9 actions: add, search, probe, related, reason, contradict, update, remove, list |
-| `fact_feedback` | Rate facts as helpful/unhelpful (trains trust scores) |
--- a/plugins/memory/holographic/init.py
+++ b/plugins/memory/holographic/init.py
@@ -1,395 +0,0 @@
-"""hermes-memory-store — holographic memory plugin using MemoryProvider interface.
-
-Registers as a MemoryProvider plugin, giving the agent structured fact storage
-with entity resolution, trust scoring, and HRR-based compositional retrieval.
-
-Original plugin by dusterbloom (PR #2351), adapted to the MemoryProvider ABC.
-
-Config in $HERMES_HOME/config.yaml (profile-scoped):
-  plugins:
-    hermes-memory-store:
-      db_path: $HERMES_HOME/memory_store.db
-      auto_extract: false
-      default_trust: 0.5
-      min_trust_threshold: 0.3
-      temporal_decay_half_life: 0
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import re
-from pathlib import Path
-from typing import Any, Dict, List
-
-from agent.memory_provider import MemoryProvider
-from .store import MemoryStore
-from .retrieval import FactRetriever
-
-logger = logging.getLogger(__name__)
-
-
-# ---------------------------------------------------------------------------
-# Tool schemas (unchanged from original PR)
-# ---------------------------------------------------------------------------
-
-FACT_STORE_SCHEMA = {
-    "name": "fact_store",
-    "description": (
-        "Deep structured memory with algebraic reasoning. "
-        "Use alongside the memory tool — memory for always-on context, "
-        "fact_store for deep recall and compositional queries.\n\n"
-        "ACTIONS (simple → powerful):\n"
-        "• add — Store a fact the user would expect you to remember.\n"
-        "• search — Keyword lookup ('editor config', 'deploy process').\n"
-        "• probe — Entity recall: ALL facts about a person/thing.\n"
-        "• related — What connects to an entity? Structural adjacency.\n"
-        "• reason — Compositional: facts connected to MULTIPLE entities simultaneously.\n"
-        "• contradict — Memory hygiene: find facts making conflicting claims.\n"
-        "• update/remove/list — CRUD operations.\n\n"
-        "IMPORTANT: Before answering questions about the user, ALWAYS probe or reason first."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "action": {
-                "type": "string",
-                "enum": ["add", "search", "probe", "related", "reason", "contradict", "update", "remove", "list"],
-            },
-            "content": {"type": "string", "description": "Fact content (required for 'add')."},
-            "query": {"type": "string", "description": "Search query (required for 'search')."},
-            "entity": {"type": "string", "description": "Entity name for 'probe'/'related'."},
-            "entities": {"type": "array", "items": {"type": "string"}, "description": "Entity names for 'reason'."},
-            "fact_id": {"type": "integer", "description": "Fact ID for 'update'/'remove'."},
-            "category": {"type": "string", "enum": ["user_pref", "project", "tool", "general"]},
-            "tags": {"type": "string", "description": "Comma-separated tags."},
-            "trust_delta": {"type": "number", "description": "Trust adjustment for 'update'."},
-            "min_trust": {"type": "number", "description": "Minimum trust filter (default: 0.3)."},
-            "limit": {"type": "integer", "description": "Max results (default: 10)."},
-        },
-        "required": ["action"],
-    },
-}
-
-FACT_FEEDBACK_SCHEMA = {
-    "name": "fact_feedback",
-    "description": (
-        "Rate a fact after using it. Mark 'helpful' if accurate, 'unhelpful' if outdated. "
-        "This trains the memory — good facts rise, bad facts sink."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "action": {"type": "string", "enum": ["helpful", "unhelpful"]},
-            "fact_id": {"type": "integer", "description": "The fact ID to rate."},
-        },
-        "required": ["action", "fact_id"],
-    },
-}
-
-
-# ---------------------------------------------------------------------------
-# Config
-# ---------------------------------------------------------------------------
-
-def _load_plugin_config() -> dict:
-    from hermes_constants import get_hermes_home
-    config_path = get_hermes_home() / "config.yaml"
-    if not config_path.exists():
-        return {}
-    try:
-        import yaml
-        with open(config_path) as f:
-            all_config = yaml.safe_load(f) or {}
-        return all_config.get("plugins", {}).get("hermes-memory-store", {}) or {}
-    except Exception:
-        return {}
-
-
-# ---------------------------------------------------------------------------
-# MemoryProvider implementation
-# ---------------------------------------------------------------------------
-
-class HolographicMemoryProvider(MemoryProvider):
-    """Holographic memory with structured facts, entity resolution, and HRR retrieval."""
-
-    def __init__(self, config: dict | None = None):
-        self._config = config or _load_plugin_config()
-        self._store = None
-        self._retriever = None
-        self._min_trust = float(self._config.get("min_trust_threshold", 0.3))
-
-    @property
-    def name(self) -> str:
-        return "holographic"
-
-    def is_available(self) -> bool:
-        return True  # SQLite is always available, numpy is optional
-
-    def save_config(self, values, hermes_home):
-        """Write config to config.yaml under plugins.hermes-memory-store."""
-        from pathlib import Path
-        config_path = Path(hermes_home) / "config.yaml"
-        try:
-            import yaml
-            existing = {}
-            if config_path.exists():
-                with open(config_path) as f:
-                    existing = yaml.safe_load(f) or {}
-            existing.setdefault("plugins", {})
-            existing["plugins"]["hermes-memory-store"] = values
-            with open(config_path, "w") as f:
-                yaml.dump(existing, f, default_flow_style=False)
-        except Exception:
-            pass
-
-    def get_config_schema(self):
-        from hermes_constants import display_hermes_home
-        _default_db = f"{display_hermes_home()}/memory_store.db"
-        return [
-            {"key": "db_path", "description": "SQLite database path", "default": _default_db},
-            {"key": "auto_extract", "description": "Auto-extract facts at session end", "default": "false", "choices": ["true", "false"]},
-            {"key": "default_trust", "description": "Default trust score for new facts", "default": "0.5"},
-            {"key": "hrr_dim", "description": "HRR vector dimensions", "default": "1024"},
-        ]
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        from hermes_constants import get_hermes_home
-        _default_db = str(get_hermes_home() / "memory_store.db")
-        db_path = self._config.get("db_path", _default_db)
-        default_trust = float(self._config.get("default_trust", 0.5))
-        hrr_dim = int(self._config.get("hrr_dim", 1024))
-        hrr_weight = float(self._config.get("hrr_weight", 0.3))
-        temporal_decay = int(self._config.get("temporal_decay_half_life", 0))
-
-        self._store = MemoryStore(db_path=db_path, default_trust=default_trust, hrr_dim=hrr_dim)
-        self._retriever = FactRetriever(
-            store=self._store,
-            temporal_decay_half_life=temporal_decay,
-            hrr_weight=hrr_weight,
-            hrr_dim=hrr_dim,
-        )
-        self._session_id = session_id
-
-    def system_prompt_block(self) -> str:
-        if not self._store:
-            return ""
-        try:
-            total = self._store._conn.execute(
-                "SELECT COUNT(*) FROM facts"
-            ).fetchone()[0]
-        except Exception:
-            total = 0
-        if total == 0:
-            return ""
-        return (
-            f"# Holographic Memory\n"
-            f"Active. {total} facts stored with entity resolution and trust scoring.\n"
-            f"Use fact_store to search, probe entities, reason across entities, or add facts.\n"
-            f"Use fact_feedback to rate facts after using them (trains trust scores)."
-        )
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        if not self._retriever or not query:
-            return ""
-        try:
-            results = self._retriever.search(query, min_trust=self._min_trust, limit=5)
-            if not results:
-                return ""
-            lines = []
-            for r in results:
-                trust = r.get("trust", 0)
-                lines.append(f"- [{trust:.1f}] {r.get('content', '')}")
-            return "## Holographic Memory\n" + "\n".join(lines)
-        except Exception as e:
-            logger.debug("Holographic prefetch failed: %s", e)
-            return ""
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        # Holographic memory stores explicit facts via tools, not auto-sync.
-        # The on_session_end hook handles auto-extraction if configured.
-        pass
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        return [FACT_STORE_SCHEMA, FACT_FEEDBACK_SCHEMA]
-
-    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
-        if tool_name == "fact_store":
-            return self._handle_fact_store(args)
-        elif tool_name == "fact_feedback":
-            return self._handle_fact_feedback(args)
-        return json.dumps({"error": f"Unknown tool: {tool_name}"})
-
-    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
-        if not self._config.get("auto_extract", False):
-            return
-        if not self._store or not messages:
-            return
-        self._auto_extract_facts(messages)
-
-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        """Mirror built-in memory writes as facts."""
-        if action == "add" and self._store and content:
-            try:
-                category = "user_pref" if target == "user" else "general"
-                self._store.add_fact(content, category=category)
-            except Exception as e:
-                logger.debug("Holographic memory_write mirror failed: %s", e)
-
-    def shutdown(self) -> None:
-        self._store = None
-        self._retriever = None
-
-    # -- Tool handlers -------------------------------------------------------
-
-    def _handle_fact_store(self, args: dict) -> str:
-        try:
-            action = args["action"]
-            store = self._store
-            retriever = self._retriever
-
-            if action == "add":
-                fact_id = store.add_fact(
-                    args["content"],
-                    category=args.get("category", "general"),
-                    tags=args.get("tags", ""),
-                )
-                return json.dumps({"fact_id": fact_id, "status": "added"})
-
-            elif action == "search":
-                results = retriever.search(
-                    args["query"],
-                    category=args.get("category"),
-                    min_trust=float(args.get("min_trust", self._min_trust)),
-                    limit=int(args.get("limit", 10)),
-                )
-                return json.dumps({"results": results, "count": len(results)})
-
-            elif action == "probe":
-                results = retriever.probe(
-                    args["entity"],
-                    category=args.get("category"),
-                    limit=int(args.get("limit", 10)),
-                )
-                return json.dumps({"results": results, "count": len(results)})
-
-            elif action == "related":
-                results = retriever.related(
-                    args["entity"],
-                    category=args.get("category"),
-                    limit=int(args.get("limit", 10)),
-                )
-                return json.dumps({"results": results, "count": len(results)})
-
-            elif action == "reason":
-                entities = args.get("entities", [])
-                if not entities:
-                    return json.dumps({"error": "reason requires 'entities' list"})
-                results = retriever.reason(
-                    entities,
-                    category=args.get("category"),
-                    limit=int(args.get("limit", 10)),
-                )
-                return json.dumps({"results": results, "count": len(results)})
-
-            elif action == "contradict":
-                results = retriever.contradict(
-                    category=args.get("category"),
-                    limit=int(args.get("limit", 10)),
-                )
-                return json.dumps({"results": results, "count": len(results)})
-
-            elif action == "update":
-                updated = store.update_fact(
-                    int(args["fact_id"]),
-                    content=args.get("content"),
-                    trust_delta=float(args["trust_delta"]) if "trust_delta" in args else None,
-                    tags=args.get("tags"),
-                    category=args.get("category"),
-                )
-                return json.dumps({"updated": updated})
-
-            elif action == "remove":
-                removed = store.remove_fact(int(args["fact_id"]))
-                return json.dumps({"removed": removed})
-
-            elif action == "list":
-                facts = store.list_facts(
-                    category=args.get("category"),
-                    min_trust=float(args.get("min_trust", 0.0)),
-                    limit=int(args.get("limit", 10)),
-                )
-                return json.dumps({"facts": facts, "count": len(facts)})
-
-            else:
-                return json.dumps({"error": f"Unknown action: {action}"})
-
-        except KeyError as exc:
-            return json.dumps({"error": f"Missing required argument: {exc}"})
-        except Exception as exc:
-            return json.dumps({"error": str(exc)})
-
-    def _handle_fact_feedback(self, args: dict) -> str:
-        try:
-            fact_id = int(args["fact_id"])
-            helpful = args["action"] == "helpful"
-            result = self._store.record_feedback(fact_id, helpful=helpful)
-            return json.dumps(result)
-        except KeyError as exc:
-            return json.dumps({"error": f"Missing required argument: {exc}"})
-        except Exception as exc:
-            return json.dumps({"error": str(exc)})
-
-    # -- Auto-extraction (on_session_end) ------------------------------------
-
-    def _auto_extract_facts(self, messages: list) -> None:
-        _PREF_PATTERNS = [
-            re.compile(r'\bI\s+(?:prefer|like|love|use|want|need)\s+(.+)', re.IGNORECASE),
-            re.compile(r'\bmy\s+(?:favorite|preferred|default)\s+\w+\s+is\s+(.+)', re.IGNORECASE),
-            re.compile(r'\bI\s+(?:always|never|usually)\s+(.+)', re.IGNORECASE),
-        ]
-        _DECISION_PATTERNS = [
-            re.compile(r'\bwe\s+(?:decided|agreed|chose)\s+(?:to\s+)?(.+)', re.IGNORECASE),
-            re.compile(r'\bthe\s+project\s+(?:uses|needs|requires)\s+(.+)', re.IGNORECASE),
-        ]
-
-        extracted = 0
-        for msg in messages:
-            if msg.get("role") != "user":
-                continue
-            content = msg.get("content", "")
-            if not isinstance(content, str) or len(content) < 10:
-                continue
-
-            for pattern in _PREF_PATTERNS:
-                if pattern.search(content):
-                    try:
-                        self._store.add_fact(content[:400], category="user_pref")
-                        extracted += 1
-                    except Exception:
-                        pass
-                    break
-
-            for pattern in _DECISION_PATTERNS:
-                if pattern.search(content):
-                    try:
-                        self._store.add_fact(content[:400], category="project")
-                        extracted += 1
-                    except Exception:
-                        pass
-                    break
-
-        if extracted:
-            logger.info("Auto-extracted %d facts from conversation", extracted)
-
-
-# ---------------------------------------------------------------------------
-# Plugin entry point
-# ---------------------------------------------------------------------------
-
-def register(ctx) -> None:
-    """Register the holographic memory provider with the plugin system."""
-    config = _load_plugin_config()
-    provider = HolographicMemoryProvider(config=config)
-    ctx.register_memory_provider(provider)
--- a/plugins/memory/holographic/holographic.py
+++ b/plugins/memory/holographic/holographic.py
@@ -1,203 +0,0 @@
-"""Holographic Reduced Representations (HRR) with phase encoding.
-
-HRRs are a vector symbolic architecture for encoding compositional structure
-into fixed-width distributed representations. This module uses *phase vectors*:
-each concept is a vector of angles in [0, 2π). The algebraic operations are:
-
-  bind   — circular convolution (phase addition)  — associates two concepts
-  unbind — circular correlation (phase subtraction) — retrieves a bound value
-  bundle — superposition (circular mean)           — merges multiple concepts
-
-Phase encoding is numerically stable, avoids the magnitude collapse of
-traditional complex-number HRRs, and maps cleanly to cosine similarity.
-
-Atoms are generated deterministically from SHA-256 so representations are
-identical across processes, machines, and language versions.
-
-References:
-  Plate (1995) — Holographic Reduced Representations
-  Gayler (2004) — Vector Symbolic Architectures answer Jackendoff's challenges
-"""
-
-import hashlib
-import logging
-import struct
-import math
-
-try:
-    import numpy as np
-    _HAS_NUMPY = True
-except ImportError:
-    _HAS_NUMPY = False
-
-logger = logging.getLogger(__name__)
-
-_TWO_PI = 2.0 * math.pi
-
-
-def _require_numpy() -> None:
-    if not _HAS_NUMPY:
-        raise RuntimeError("numpy is required for holographic operations")
-
-
-def encode_atom(word: str, dim: int = 1024) -> "np.ndarray":
-    """Deterministic phase vector via SHA-256 counter blocks.
-
-    Uses hashlib (not numpy RNG) for cross-platform reproducibility.
-
-    Algorithm:
-    - Generate enough SHA-256 blocks by hashing f"{word}:{i}" for i=0,1,2,...
-    - Concatenate digests, interpret as uint16 values via struct.unpack
-    - Scale to [0, 2π): phases = values * (2π / 65536)
-    - Truncate to dim elements
-    - Returns np.float64 array of shape (dim,)
-    """
-    _require_numpy()
-
-    # Each SHA-256 digest is 32 bytes = 16 uint16 values.
-    values_per_block = 16
-    blocks_needed = math.ceil(dim / values_per_block)
-
-    uint16_values: list[int] = []
-    for i in range(blocks_needed):
-        digest = hashlib.sha256(f"{word}:{i}".encode()).digest()
-        uint16_values.extend(struct.unpack("<16H", digest))
-
-    phases = np.array(uint16_values[:dim], dtype=np.float64) * (_TWO_PI / 65536.0)
-    return phases
-
-
-def bind(a: "np.ndarray", b: "np.ndarray") -> "np.ndarray":
-    """Circular convolution = element-wise phase addition.
-
-    Binding associates two concepts into a single composite vector.
-    The result is dissimilar to both inputs (quasi-orthogonal).
-    """
-    _require_numpy()
-    return (a + b) % _TWO_PI
-
-
-def unbind(memory: "np.ndarray", key: "np.ndarray") -> "np.ndarray":
-    """Circular correlation = element-wise phase subtraction.
-
-    Unbinding retrieves the value associated with a key from a memory vector.
-    unbind(bind(a, b), a) ≈ b  (up to superposition noise)
-    """
-    _require_numpy()
-    return (memory - key) % _TWO_PI
-
-
-def bundle(*vectors: "np.ndarray") -> "np.ndarray":
-    """Superposition via circular mean of complex exponentials.
-
-    Bundling merges multiple vectors into one that is similar to each input.
-    The result can hold O(sqrt(dim)) items before similarity degrades.
-    """
-    _require_numpy()
-    complex_sum = np.sum([np.exp(1j * v) for v in vectors], axis=0)
-    return np.angle(complex_sum) % _TWO_PI
-
-
-def similarity(a: "np.ndarray", b: "np.ndarray") -> float:
-    """Phase cosine similarity. Range [-1, 1].
-
-    Returns 1.0 for identical vectors, near 0.0 for random (unrelated) vectors,
-    and -1.0 for perfectly anti-correlated vectors.
-    """
-    _require_numpy()
-    return float(np.mean(np.cos(a - b)))
-
-
-def encode_text(text: str, dim: int = 1024) -> "np.ndarray":
-    """Bag-of-words: bundle of atom vectors for each token.
-
-    Tokenizes by lowercasing, splitting on whitespace, and stripping
-    leading/trailing punctuation from each token.
-
-    Returns bundle of all token atom vectors.
-    If text is empty or produces no tokens, returns encode_atom("__hrr_empty__", dim).
-    """
-    _require_numpy()
-
-    tokens = [
-        token.strip(".,!?;:\"'()[]{}")
-        for token in text.lower().split()
-    ]
-    tokens = [t for t in tokens if t]
-
-    if not tokens:
-        return encode_atom("__hrr_empty__", dim)
-
-    atom_vectors = [encode_atom(token, dim) for token in tokens]
-    return bundle(*atom_vectors)
-
-
-def encode_fact(content: str, entities: list[str], dim: int = 1024) -> "np.ndarray":
-    """Structured encoding: content bound to ROLE_CONTENT, each entity bound to ROLE_ENTITY, all bundled.
-
-    Role vectors are reserved atoms: "__hrr_role_content__", "__hrr_role_entity__"
-
-    Components:
-    1. bind(encode_text(content, dim), encode_atom("__hrr_role_content__", dim))
-    2. For each entity: bind(encode_atom(entity.lower(), dim), encode_atom("__hrr_role_entity__", dim))
-    3. bundle all components together
-
-    This enables algebraic extraction:
-        unbind(fact, bind(entity, ROLE_ENTITY)) ≈ content_vector
-    """
-    _require_numpy()
-
-    role_content = encode_atom("__hrr_role_content__", dim)
-    role_entity = encode_atom("__hrr_role_entity__", dim)
-
-    components: list[np.ndarray] = [
-        bind(encode_text(content, dim), role_content)
-    ]
-
-    for entity in entities:
-        components.append(bind(encode_atom(entity.lower(), dim), role_entity))
-
-    return bundle(*components)
-
-
-def phases_to_bytes(phases: "np.ndarray") -> bytes:
-    """Serialize phase vector to bytes. float64 tobytes — 8 KB at dim=1024."""
-    _require_numpy()
-    return phases.tobytes()
-
-
-def bytes_to_phases(data: bytes) -> "np.ndarray":
-    """Deserialize bytes back to phase vector. Inverse of phases_to_bytes.
-
-    The .copy() call is required because frombuffer returns a read-only view
-    backed by the bytes object; callers expect a mutable array.
-    """
-    _require_numpy()
-    return np.frombuffer(data, dtype=np.float64).copy()
-
-
-def snr_estimate(dim: int, n_items: int) -> float:
-    """Signal-to-noise ratio estimate for holographic storage.
-
-    SNR = sqrt(dim / n_items) when n_items > 0, else inf.
-
-    The SNR falls below 2.0 when n_items > dim / 4, meaning retrieval
-    errors become likely. Logs a warning when this threshold is crossed.
-    """
-    _require_numpy()
-
-    if n_items <= 0:
-        return float("inf")
-
-    snr = math.sqrt(dim / n_items)
-
-    if snr < 2.0:
-        logger.warning(
-            "HRR storage near capacity: SNR=%.2f (dim=%d, n_items=%d). "
-            "Retrieval accuracy may degrade. Consider increasing dim or reducing stored items.",
-            snr,
-            dim,
-            n_items,
-        )
-
-    return snr
--- a/plugins/memory/holographic/plugin.yaml
+++ b/plugins/memory/holographic/plugin.yaml
@@ -1,5 +0,0 @@
-name: holographic
-version: 0.1.0
-description: "Holographic memory — local SQLite fact store with FTS5 search, trust scoring, and HRR-based compositional retrieval."
-hooks:
-  - on_session_end
--- a/plugins/memory/holographic/retrieval.py
+++ b/plugins/memory/holographic/retrieval.py
@@ -1,593 +0,0 @@
-"""Hybrid keyword/BM25 retrieval for the memory store.
-
-Ported from KIK memory_agent.py — combines FTS5 full-text search with
-Jaccard similarity reranking and trust-weighted scoring.
-"""
-
-from __future__ import annotations
-
-import math
-from datetime import datetime, timezone
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from .store import MemoryStore
-
-try:
-    from . import holographic as hrr
-except ImportError:
-    import holographic as hrr  # type: ignore[no-redef]
-
-
-class FactRetriever:
-    """Multi-strategy fact retrieval with trust-weighted scoring."""
-
-    def __init__(
-        self,
-        store: MemoryStore,
-        temporal_decay_half_life: int = 0,  # days, 0 = disabled
-        fts_weight: float = 0.4,
-        jaccard_weight: float = 0.3,
-        hrr_weight: float = 0.3,
-        hrr_dim: int = 1024,
-    ):
-        self.store = store
-        self.half_life = temporal_decay_half_life
-        self.hrr_dim = hrr_dim
-
-        # Auto-redistribute weights if numpy unavailable
-        if hrr_weight > 0 and not hrr._HAS_NUMPY:
-            fts_weight = 0.6
-            jaccard_weight = 0.4
-            hrr_weight = 0.0
-
-        self.fts_weight = fts_weight
-        self.jaccard_weight = jaccard_weight
-        self.hrr_weight = hrr_weight
-
-    def search(
-        self,
-        query: str,
-        category: str | None = None,
-        min_trust: float = 0.3,
-        limit: int = 10,
-    ) -> list[dict]:
-        """Hybrid search: FTS5 candidates → Jaccard rerank → trust weighting.
-
-        Pipeline:
-        1. FTS5 search: Get limit*3 candidates from SQLite full-text search
-        2. Jaccard boost: Token overlap between query and fact content
-        3. Trust weighting: final_score = relevance * trust_score
-        4. Temporal decay (optional): decay = 0.5^(age_days / half_life)
-
-        Returns list of dicts with fact data + 'score' field, sorted by score desc.
-        """
-        # Stage 1: Get FTS5 candidates (more than limit for reranking headroom)
-        candidates = self._fts_candidates(query, category, min_trust, limit * 3)
-
-        if not candidates:
-            return []
-
-        # Stage 2: Rerank with Jaccard + trust + optional decay
-        query_tokens = self._tokenize(query)
-        scored = []
-
-        for fact in candidates:
-            content_tokens = self._tokenize(fact["content"])
-            tag_tokens = self._tokenize(fact.get("tags", ""))
-            all_tokens = content_tokens | tag_tokens
-
-            jaccard = self._jaccard_similarity(query_tokens, all_tokens)
-            fts_score = fact.get("fts_rank", 0.0)
-
-            # HRR similarity
-            if self.hrr_weight > 0 and fact.get("hrr_vector"):
-                fact_vec = hrr.bytes_to_phases(fact["hrr_vector"])
-                query_vec = hrr.encode_text(query, self.hrr_dim)
-                hrr_sim = (hrr.similarity(query_vec, fact_vec) + 1.0) / 2.0  # shift to [0,1]
-            else:
-                hrr_sim = 0.5  # neutral
-
-            # Combine FTS5 + Jaccard + HRR
-            relevance = (self.fts_weight * fts_score
-                        + self.jaccard_weight * jaccard
-                        + self.hrr_weight * hrr_sim)
-
-            # Trust weighting
-            score = relevance * fact["trust_score"]
-
-            # Optional temporal decay
-            if self.half_life > 0:
-                score *= self._temporal_decay(fact.get("updated_at") or fact.get("created_at"))
-
-            fact["score"] = score
-            scored.append(fact)
-
-        # Sort by score descending, return top limit
-        scored.sort(key=lambda x: x["score"], reverse=True)
-        results = scored[:limit]
-        # Strip raw HRR bytes — callers expect JSON-serializable dicts
-        for fact in results:
-            fact.pop("hrr_vector", None)
-        return results
-
-    def probe(
-        self,
-        entity: str,
-        category: str | None = None,
-        limit: int = 10,
-    ) -> list[dict]:
-        """Compositional entity query using HRR algebra.
-
-        Unbinds entity from memory bank to extract associated content.
-        This is NOT keyword search — it uses algebraic structure to find facts
-        where the entity plays a structural role.
-
-        Falls back to FTS5 search if numpy unavailable.
-        """
-        if not hrr._HAS_NUMPY:
-            # Fallback to keyword search on entity name
-            return self.search(entity, category=category, limit=limit)
-
-        conn = self.store._conn
-
-        # Encode entity as role-bound vector
-        role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)
-        entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)
-        probe_key = hrr.bind(entity_vec, role_entity)
-
-        # Try category-specific bank first, then all facts
-        if category:
-            bank_name = f"cat:{category}"
-            bank_row = conn.execute(
-                "SELECT vector FROM memory_banks WHERE bank_name = ?",
-                (bank_name,),
-            ).fetchone()
-            if bank_row:
-                bank_vec = hrr.bytes_to_phases(bank_row["vector"])
-                extracted = hrr.unbind(bank_vec, probe_key)
-                # Use extracted signal to score individual facts
-                return self._score_facts_by_vector(
-                    extracted, category=category, limit=limit
-                )
-
-        # Score against individual fact vectors directly
-        where = "WHERE hrr_vector IS NOT NULL"
-        params: list = []
-        if category:
-            where += " AND category = ?"
-            params.append(category)
-
-        rows = conn.execute(
-            f"""
-            SELECT fact_id, content, category, tags, trust_score,
-                   retrieval_count, helpful_count, created_at, updated_at,
-                   hrr_vector
-            FROM facts
-            {where}
-            """,
-            params,
-        ).fetchall()
-
-        if not rows:
-            # Final fallback: keyword search
-            return self.search(entity, category=category, limit=limit)
-
-        scored = []
-        for row in rows:
-            fact = dict(row)
-            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
-            # Unbind probe key from fact to see if entity is structurally present
-            residual = hrr.unbind(fact_vec, probe_key)
-            # Compare residual against content signal
-            role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)
-            content_vec = hrr.bind(hrr.encode_text(fact["content"], self.hrr_dim), role_content)
-            sim = hrr.similarity(residual, content_vec)
-            fact["score"] = (sim + 1.0) / 2.0 * fact["trust_score"]
-            scored.append(fact)
-
-        scored.sort(key=lambda x: x["score"], reverse=True)
-        return scored[:limit]
-
-    def related(
-        self,
-        entity: str,
-        category: str | None = None,
-        limit: int = 10,
-    ) -> list[dict]:
-        """Discover facts that share structural connections with an entity.
-
-        Unlike probe (which finds facts *about* an entity), related finds
-        facts that are connected through shared context — e.g., other entities
-        mentioned alongside this one, or content that overlaps structurally.
-
-        Falls back to FTS5 search if numpy unavailable.
-        """
-        if not hrr._HAS_NUMPY:
-            return self.search(entity, category=category, limit=limit)
-
-        conn = self.store._conn
-
-        # Encode entity as a bare atom (not role-bound — we want ANY structural match)
-        entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)
-
-        # Get all facts with vectors
-        where = "WHERE hrr_vector IS NOT NULL"
-        params: list = []
-        if category:
-            where += " AND category = ?"
-            params.append(category)
-
-        rows = conn.execute(
-            f"""
-            SELECT fact_id, content, category, tags, trust_score,
-                   retrieval_count, helpful_count, created_at, updated_at,
-                   hrr_vector
-            FROM facts
-            {where}
-            """,
-            params,
-        ).fetchall()
-
-        if not rows:
-            return self.search(entity, category=category, limit=limit)
-
-        # Score each fact by how much the entity's atom appears in its vector
-        # This catches both role-bound entity matches AND content word matches
-        scored = []
-        for row in rows:
-            fact = dict(row)
-            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
-
-            # Check structural similarity: unbind entity from fact
-            residual = hrr.unbind(fact_vec, entity_vec)
-            # A high-similarity residual to ANY known role vector means this entity
-            # plays a structural role in the fact
-            role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)
-            role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)
-
-            entity_role_sim = hrr.similarity(residual, role_entity)
-            content_role_sim = hrr.similarity(residual, role_content)
-            # Take the max — entity could appear in either role
-            best_sim = max(entity_role_sim, content_role_sim)
-
-            fact["score"] = (best_sim + 1.0) / 2.0 * fact["trust_score"]
-            scored.append(fact)
-
-        scored.sort(key=lambda x: x["score"], reverse=True)
-        return scored[:limit]
-
-    def reason(
-        self,
-        entities: list[str],
-        category: str | None = None,
-        limit: int = 10,
-    ) -> list[dict]:
-        """Multi-entity compositional query — vector-space JOIN.
-
-        Given multiple entities, algebraically intersects their structural
-        connections to find facts related to ALL of them simultaneously.
-        This is compositional reasoning that no embedding DB can do.
-
-        Example: reason(["peppi", "backend"]) finds facts where peppi AND
-        backend both play structural roles — without keyword matching.
-
-        Falls back to FTS5 search if numpy unavailable.
-        """
-        if not hrr._HAS_NUMPY or not entities:
-            # Fallback: search with all entities as keywords
-            query = " ".join(entities)
-            return self.search(query, category=category, limit=limit)
-
-        conn = self.store._conn
-        role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)
-
-        # For each entity, compute what the bank "remembers" about it
-        # by unbinding entity+role from each fact vector
-        entity_residuals = []
-        for entity in entities:
-            entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)
-            probe_key = hrr.bind(entity_vec, role_entity)
-            entity_residuals.append(probe_key)
-
-        # Get all facts with vectors
-        where = "WHERE hrr_vector IS NOT NULL"
-        params: list = []
-        if category:
-            where += " AND category = ?"
-            params.append(category)
-
-        rows = conn.execute(
-            f"""
-            SELECT fact_id, content, category, tags, trust_score,
-                   retrieval_count, helpful_count, created_at, updated_at,
-                   hrr_vector
-            FROM facts
-            {where}
-            """,
-            params,
-        ).fetchall()
-
-        if not rows:
-            query = " ".join(entities)
-            return self.search(query, category=category, limit=limit)
-
-        # Score each fact by how much EACH entity is structurally present.
-        # A fact scores high only if ALL entities have structural presence
-        # (AND semantics via min, vs OR which would use mean/max).
-        role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)
-
-        scored = []
-        for row in rows:
-            fact = dict(row)
-            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
-
-            entity_scores = []
-            for probe_key in entity_residuals:
-                residual = hrr.unbind(fact_vec, probe_key)
-                sim = hrr.similarity(residual, role_content)
-                entity_scores.append(sim)
-
-            min_sim = min(entity_scores)
-            fact["score"] = (min_sim + 1.0) / 2.0 * fact["trust_score"]
-            scored.append(fact)
-
-        scored.sort(key=lambda x: x["score"], reverse=True)
-        return scored[:limit]
-
-    def contradict(
-        self,
-        category: str | None = None,
-        threshold: float = 0.3,
-        limit: int = 10,
-    ) -> list[dict]:
-        """Find potentially contradictory facts via entity overlap + content divergence.
-
-        Two facts contradict when they share entities (same subject) but have
-        low content-vector similarity (different claims). This is automated
-        memory hygiene — no other memory system does this.
-
-        Returns pairs of facts with a contradiction score.
-        Falls back to empty list if numpy unavailable.
-        """
-        if not hrr._HAS_NUMPY:
-            return []
-
-        conn = self.store._conn
-
-        # Get all facts with vectors and their linked entities
-        where = "WHERE f.hrr_vector IS NOT NULL"
-        params: list = []
-        if category:
-            where += " AND f.category = ?"
-            params.append(category)
-
-        rows = conn.execute(
-            f"""
-            SELECT f.fact_id, f.content, f.category, f.tags, f.trust_score,
-                   f.created_at, f.updated_at, f.hrr_vector
-            FROM facts f
-            {where}
-            """,
-            params,
-        ).fetchall()
-
-        if len(rows) < 2:
-            return []
-
-        # Guard against O(n²) explosion on large fact stores.
-        # At 500 facts, that's ~125K comparisons — acceptable.
-        # Above that, only check the most recently updated facts.
-        _MAX_CONTRADICT_FACTS = 500
-        if len(rows) > _MAX_CONTRADICT_FACTS:
-            rows = sorted(rows, key=lambda r: r["updated_at"] or r["created_at"], reverse=True)
-            rows = rows[:_MAX_CONTRADICT_FACTS]
-
-        # Build entity sets per fact
-        fact_entities: dict[int, set[str]] = {}
-        for row in rows:
-            fid = row["fact_id"]
-            entity_rows = conn.execute(
-                """
-                SELECT e.name FROM entities e
-                JOIN fact_entities fe ON fe.entity_id = e.entity_id
-                WHERE fe.fact_id = ?
-                """,
-                (fid,),
-            ).fetchall()
-            fact_entities[fid] = {r["name"].lower() for r in entity_rows}
-
-        # Compare all pairs: high entity overlap + low content similarity = contradiction
-        facts = [dict(r) for r in rows]
-        contradictions = []
-
-        for i in range(len(facts)):
-            for j in range(i + 1, len(facts)):
-                f1, f2 = facts[i], facts[j]
-                ents1 = fact_entities.get(f1["fact_id"], set())
-                ents2 = fact_entities.get(f2["fact_id"], set())
-
-                if not ents1 or not ents2:
-                    continue
-
-                # Entity overlap (Jaccard)
-                entity_overlap = len(ents1 & ents2) / len(ents1 | ents2) if (ents1 | ents2) else 0.0
-
-                if entity_overlap < 0.3:
-                    continue  # Not enough entity overlap to be contradictory
-
-                # Content similarity via HRR vectors
-                v1 = hrr.bytes_to_phases(f1["hrr_vector"])
-                v2 = hrr.bytes_to_phases(f2["hrr_vector"])
-                content_sim = hrr.similarity(v1, v2)
-
-                # High entity overlap + low content similarity = potential contradiction
-                # contradiction_score: higher = more contradictory
-                contradiction_score = entity_overlap * (1.0 - (content_sim + 1.0) / 2.0)
-
-                if contradiction_score >= threshold:
-                    # Strip hrr_vector from output (not JSON serializable)
-                    f1_clean = {k: v for k, v in f1.items() if k != "hrr_vector"}
-                    f2_clean = {k: v for k, v in f2.items() if k != "hrr_vector"}
-                    contradictions.append({
-                        "fact_a": f1_clean,
-                        "fact_b": f2_clean,
-                        "entity_overlap": round(entity_overlap, 3),
-                        "content_similarity": round(content_sim, 3),
-                        "contradiction_score": round(contradiction_score, 3),
-                        "shared_entities": sorted(ents1 & ents2),
-                    })
-
-        contradictions.sort(key=lambda x: x["contradiction_score"], reverse=True)
-        return contradictions[:limit]
-
-    def _score_facts_by_vector(
-        self,
-        target_vec: "np.ndarray",
-        category: str | None = None,
-        limit: int = 10,
-    ) -> list[dict]:
-        """Score facts by similarity to a target vector."""
-        conn = self.store._conn
-
-        where = "WHERE hrr_vector IS NOT NULL"
-        params: list = []
-        if category:
-            where += " AND category = ?"
-            params.append(category)
-
-        rows = conn.execute(
-            f"""
-            SELECT fact_id, content, category, tags, trust_score,
-                   retrieval_count, helpful_count, created_at, updated_at,
-                   hrr_vector
-            FROM facts
-            {where}
-            """,
-            params,
-        ).fetchall()
-
-        scored = []
-        for row in rows:
-            fact = dict(row)
-            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
-            sim = hrr.similarity(target_vec, fact_vec)
-            fact["score"] = (sim + 1.0) / 2.0 * fact["trust_score"]
-            scored.append(fact)
-
-        scored.sort(key=lambda x: x["score"], reverse=True)
-        return scored[:limit]
-
-    def _fts_candidates(
-        self,
-        query: str,
-        category: str | None,
-        min_trust: float,
-        limit: int,
-    ) -> list[dict]:
-        """Get raw FTS5 candidates from the store.
-
-        Uses the store's database connection directly for FTS5 MATCH
-        with rank scoring. Normalizes FTS5 rank to [0, 1] range.
-        """
-        conn = self.store._conn
-
-        # Build query - FTS5 rank is negative (lower = better match)
-        # We need to join facts_fts with facts to get all columns
-        params: list = []
-        where_clauses = ["facts_fts MATCH ?"]
-        params.append(query)
-
-        if category:
-            where_clauses.append("f.category = ?")
-            params.append(category)
-
-        where_clauses.append("f.trust_score >= ?")
-        params.append(min_trust)
-
-        where_sql = " AND ".join(where_clauses)
-
-        sql = f"""
-            SELECT f.*, facts_fts.rank as fts_rank_raw
-            FROM facts_fts
-            JOIN facts f ON f.fact_id = facts_fts.rowid
-            WHERE {where_sql}
-            ORDER BY facts_fts.rank
-            LIMIT ?
-        """
-        params.append(limit)
-
-        try:
-            rows = conn.execute(sql, params).fetchall()
-        except Exception:
-            # FTS5 MATCH can fail on malformed queries — fall back to empty
-            return []
-
-        if not rows:
-            return []
-
-        # Normalize FTS5 rank: rank is negative, lower = better
-        # Convert to positive score in [0, 1] range
-        raw_ranks = [abs(row["fts_rank_raw"]) for row in rows]
-        max_rank = max(raw_ranks) if raw_ranks else 1.0
-        max_rank = max(max_rank, 1e-6)  # avoid div by zero
-
-        results = []
-        for row, raw_rank in zip(rows, raw_ranks):
-            fact = dict(row)
-            fact.pop("fts_rank_raw", None)
-            fact["fts_rank"] = raw_rank / max_rank  # normalize to [0, 1]
-            results.append(fact)
-
-        return results
-
-    @staticmethod
-    def _tokenize(text: str) -> set[str]:
-        """Simple whitespace tokenization with lowercasing.
-
-        Strips common punctuation. No stemming/lemmatization (Phase 1).
-        """
-        if not text:
-            return set()
-        # Split on whitespace, lowercase, strip punctuation
-        tokens = set()
-        for word in text.lower().split():
-            cleaned = word.strip(".,;:!?\"'()[]{}#@<>")
-            if cleaned:
-                tokens.add(cleaned)
-        return tokens
-
-    @staticmethod
-    def _jaccard_similarity(set_a: set, set_b: set) -> float:
-        """Jaccard similarity coefficient: |A ∩ B| / |A ∪ B|."""
-        if not set_a or not set_b:
-            return 0.0
-        intersection = len(set_a & set_b)
-        union = len(set_a | set_b)
-        return intersection / union if union > 0 else 0.0
-
-    def _temporal_decay(self, timestamp_str: str | None) -> float:
-        """Exponential decay: 0.5^(age_days / half_life_days).
-
-        Returns 1.0 if decay is disabled or timestamp is missing.
-        """
-        if not self.half_life or not timestamp_str:
-            return 1.0
-
-        try:
-            if isinstance(timestamp_str, str):
-                # Parse ISO format timestamp from SQLite
-                ts = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
-            else:
-                ts = timestamp_str
-
-            if ts.tzinfo is None:
-                ts = ts.replace(tzinfo=timezone.utc)
-
-            age_days = (datetime.now(timezone.utc) - ts).total_seconds() / 86400
-            if age_days < 0:
-                return 1.0
-
-            return math.pow(0.5, age_days / self.half_life)
-        except (ValueError, TypeError):
-            return 1.0
--- a/plugins/memory/holographic/store.py
+++ b/plugins/memory/holographic/store.py
@@ -1,575 +0,0 @@
-"""
-SQLite-backed fact store with entity resolution and trust scoring.
-Single-user Hermes memory store plugin.
-"""
-
-import re
-import sqlite3
-import threading
-from datetime import datetime
-from pathlib import Path
-
-try:
-    from . import holographic as hrr
-except ImportError:
-    import holographic as hrr  # type: ignore[no-redef]
-
-_SCHEMA = """
-CREATE TABLE IF NOT EXISTS facts (
-    fact_id         INTEGER PRIMARY KEY AUTOINCREMENT,
-    content         TEXT NOT NULL UNIQUE,
-    category        TEXT DEFAULT 'general',
-    tags            TEXT DEFAULT '',
-    trust_score     REAL DEFAULT 0.5,
-    retrieval_count INTEGER DEFAULT 0,
-    helpful_count   INTEGER DEFAULT 0,
-    created_at      TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    updated_at      TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    hrr_vector      BLOB
-);
-
-CREATE TABLE IF NOT EXISTS entities (
-    entity_id   INTEGER PRIMARY KEY AUTOINCREMENT,
-    name        TEXT NOT NULL,
-    entity_type TEXT DEFAULT 'unknown',
-    aliases     TEXT DEFAULT '',
-    created_at  TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-);
-
-CREATE TABLE IF NOT EXISTS fact_entities (
-    fact_id   INTEGER REFERENCES facts(fact_id),
-    entity_id INTEGER REFERENCES entities(entity_id),
-    PRIMARY KEY (fact_id, entity_id)
-);
-
-CREATE INDEX IF NOT EXISTS idx_facts_trust    ON facts(trust_score DESC);
-CREATE INDEX IF NOT EXISTS idx_facts_category ON facts(category);
-CREATE INDEX IF NOT EXISTS idx_entities_name  ON entities(name);
-
-CREATE VIRTUAL TABLE IF NOT EXISTS facts_fts
-    USING fts5(content, tags, content=facts, content_rowid=fact_id);
-
-CREATE TRIGGER IF NOT EXISTS facts_ai AFTER INSERT ON facts BEGIN
-    INSERT INTO facts_fts(rowid, content, tags)
-        VALUES (new.fact_id, new.content, new.tags);
-END;
-
-CREATE TRIGGER IF NOT EXISTS facts_ad AFTER DELETE ON facts BEGIN
-    INSERT INTO facts_fts(facts_fts, rowid, content, tags)
-        VALUES ('delete', old.fact_id, old.content, old.tags);
-END;
-
-CREATE TRIGGER IF NOT EXISTS facts_au AFTER UPDATE ON facts BEGIN
-    INSERT INTO facts_fts(facts_fts, rowid, content, tags)
-        VALUES ('delete', old.fact_id, old.content, old.tags);
-    INSERT INTO facts_fts(rowid, content, tags)
-        VALUES (new.fact_id, new.content, new.tags);
-END;
-
-CREATE TABLE IF NOT EXISTS memory_banks (
-    bank_id    INTEGER PRIMARY KEY AUTOINCREMENT,
-    bank_name  TEXT NOT NULL UNIQUE,
-    vector     BLOB NOT NULL,
-    dim        INTEGER NOT NULL,
-    fact_count INTEGER DEFAULT 0,
-    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-);
-"""
-
-# Trust adjustment constants
-_HELPFUL_DELTA   =  0.05
-_UNHELPFUL_DELTA = -0.10
-_TRUST_MIN       =  0.0
-_TRUST_MAX       =  1.0
-
-# Entity extraction patterns
-_RE_CAPITALIZED  = re.compile(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b')
-_RE_DOUBLE_QUOTE = re.compile(r'"([^"]+)"')
-_RE_SINGLE_QUOTE = re.compile(r"'([^']+)'")
-_RE_AKA          = re.compile(
-    r'(\w+(?:\s+\w+)*)\s+(?:aka|also known as)\s+(\w+(?:\s+\w+)*)',
-    re.IGNORECASE,
-)
-
-
-def _clamp_trust(value: float) -> float:
-    return max(_TRUST_MIN, min(_TRUST_MAX, value))
-
-
-class MemoryStore:
-    """SQLite-backed fact store with entity resolution and trust scoring."""
-
-    def __init__(
-        self,
-        db_path: "str | Path | None" = None,
-        default_trust: float = 0.5,
-        hrr_dim: int = 1024,
-    ) -> None:
-        if db_path is None:
-            from hermes_constants import get_hermes_home
-            db_path = str(get_hermes_home() / "memory_store.db")
-        self.db_path = Path(db_path).expanduser()
-        self.db_path.parent.mkdir(parents=True, exist_ok=True)
-        self.default_trust = _clamp_trust(default_trust)
-        self.hrr_dim = hrr_dim
-        self._hrr_available = hrr._HAS_NUMPY
-        self._conn: sqlite3.Connection = sqlite3.connect(
-            str(self.db_path),
-            check_same_thread=False,
-            timeout=10.0,
-        )
-        self._lock = threading.RLock()
-        self._conn.row_factory = sqlite3.Row
-        self._init_db()
-
-    # ------------------------------------------------------------------
-    # Initialisation
-    # ------------------------------------------------------------------
-
-    def _init_db(self) -> None:
-        """Create tables, indexes, and triggers if they do not exist. Enable WAL mode."""
-        self._conn.execute("PRAGMA journal_mode=WAL")
-        self._conn.executescript(_SCHEMA)
-        # Migrate: add hrr_vector column if missing (safe for existing databases)
-        columns = {row[1] for row in self._conn.execute("PRAGMA table_info(facts)").fetchall()}
-        if "hrr_vector" not in columns:
-            self._conn.execute("ALTER TABLE facts ADD COLUMN hrr_vector BLOB")
-        self._conn.commit()
-
-    # ------------------------------------------------------------------
-    # Public API
-    # ------------------------------------------------------------------
-
-    def add_fact(
-        self,
-        content: str,
-        category: str = "general",
-        tags: str = "",
-    ) -> int:
-        """Insert a fact and return its fact_id.
-
-        Deduplicates by content (UNIQUE constraint). On duplicate, returns
-        the existing fact_id without modifying the row. Extracts entities from
-        the content and links them to the fact.
-        """
-        with self._lock:
-            content = content.strip()
-            if not content:
-                raise ValueError("content must not be empty")
-
-            try:
-                cur = self._conn.execute(
-                    """
-                    INSERT INTO facts (content, category, tags, trust_score)
-                    VALUES (?, ?, ?, ?)
-                    """,
-                    (content, category, tags, self.default_trust),
-                )
-                self._conn.commit()
-                fact_id: int = cur.lastrowid  # type: ignore[assignment]
-            except sqlite3.IntegrityError:
-                # Duplicate content — return existing id
-                row = self._conn.execute(
-                    "SELECT fact_id FROM facts WHERE content = ?", (content,)
-                ).fetchone()
-                return int(row["fact_id"])
-
-            # Entity extraction and linking
-            for name in self._extract_entities(content):
-                entity_id = self._resolve_entity(name)
-                self._link_fact_entity(fact_id, entity_id)
-
-            # Compute HRR vector after entity linking
-            self._compute_hrr_vector(fact_id, content)
-            self._rebuild_bank(category)
-
-            return fact_id
-
-    def search_facts(
-        self,
-        query: str,
-        category: str | None = None,
-        min_trust: float = 0.3,
-        limit: int = 10,
-    ) -> list[dict]:
-        """Full-text search over facts using FTS5.
-
-        Returns a list of fact dicts ordered by FTS5 rank, then trust_score
-        descending. Also increments retrieval_count for matched facts.
-        """
-        with self._lock:
-            query = query.strip()
-            if not query:
-                return []
-
-            params: list = [query, min_trust]
-            category_clause = ""
-            if category is not None:
-                category_clause = "AND f.category = ?"
-                params.append(category)
-            params.append(limit)
-
-            sql = f"""
-                SELECT f.fact_id, f.content, f.category, f.tags,
-                       f.trust_score, f.retrieval_count, f.helpful_count,
-                       f.created_at, f.updated_at
-                FROM facts f
-                JOIN facts_fts fts ON fts.rowid = f.fact_id
-                WHERE facts_fts MATCH ?
-                  AND f.trust_score >= ?
-                  {category_clause}
-                ORDER BY fts.rank, f.trust_score DESC
-                LIMIT ?
-            """
-
-            rows = self._conn.execute(sql, params).fetchall()
-            results = [self._row_to_dict(r) for r in rows]
-
-            if results:
-                ids = [r["fact_id"] for r in results]
-                placeholders = ",".join("?" * len(ids))
-                self._conn.execute(
-                    f"UPDATE facts SET retrieval_count = retrieval_count + 1 WHERE fact_id IN ({placeholders})",
-                    ids,
-                )
-                self._conn.commit()
-
-            return results
-
-    def update_fact(
-        self,
-        fact_id: int,
-        content: str | None = None,
-        trust_delta: float | None = None,
-        tags: str | None = None,
-        category: str | None = None,
-    ) -> bool:
-        """Partially update a fact. Trust is clamped to [0, 1].
-
-        Returns True if the row existed, False otherwise.
-        """
-        with self._lock:
-            row = self._conn.execute(
-                "SELECT fact_id, trust_score FROM facts WHERE fact_id = ?", (fact_id,)
-            ).fetchone()
-            if row is None:
-                return False
-
-            assignments: list[str] = ["updated_at = CURRENT_TIMESTAMP"]
-            params: list = []
-
-            if content is not None:
-                assignments.append("content = ?")
-                params.append(content.strip())
-            if tags is not None:
-                assignments.append("tags = ?")
-                params.append(tags)
-            if category is not None:
-                assignments.append("category = ?")
-                params.append(category)
-            if trust_delta is not None:
-                new_trust = _clamp_trust(row["trust_score"] + trust_delta)
-                assignments.append("trust_score = ?")
-                params.append(new_trust)
-
-            params.append(fact_id)
-            self._conn.execute(
-                f"UPDATE facts SET {', '.join(assignments)} WHERE fact_id = ?",
-                params,
-            )
-            self._conn.commit()
-
-            # If content changed, re-extract entities
-            if content is not None:
-                self._conn.execute(
-                    "DELETE FROM fact_entities WHERE fact_id = ?", (fact_id,)
-                )
-                for name in self._extract_entities(content):
-                    entity_id = self._resolve_entity(name)
-                    self._link_fact_entity(fact_id, entity_id)
-                self._conn.commit()
-
-            # Recompute HRR vector if content changed
-            if content is not None:
-                self._compute_hrr_vector(fact_id, content)
-            # Rebuild bank for relevant category
-            cat = category or self._conn.execute(
-                "SELECT category FROM facts WHERE fact_id = ?", (fact_id,)
-            ).fetchone()["category"]
-            self._rebuild_bank(cat)
-
-            return True
-
-    def remove_fact(self, fact_id: int) -> bool:
-        """Delete a fact and its entity links. Returns True if the row existed."""
-        with self._lock:
-            row = self._conn.execute(
-                "SELECT fact_id, category FROM facts WHERE fact_id = ?", (fact_id,)
-            ).fetchone()
-            if row is None:
-                return False
-
-            self._conn.execute(
-                "DELETE FROM fact_entities WHERE fact_id = ?", (fact_id,)
-            )
-            self._conn.execute("DELETE FROM facts WHERE fact_id = ?", (fact_id,))
-            self._conn.commit()
-            self._rebuild_bank(row["category"])
-            return True
-
-    def list_facts(
-        self,
-        category: str | None = None,
-        min_trust: float = 0.0,
-        limit: int = 50,
-    ) -> list[dict]:
-        """Browse facts ordered by trust_score descending.
-
-        Optionally filter by category and minimum trust score.
-        """
-        with self._lock:
-            params: list = [min_trust]
-            category_clause = ""
-            if category is not None:
-                category_clause = "AND category = ?"
-                params.append(category)
-            params.append(limit)
-
-            sql = f"""
-                SELECT fact_id, content, category, tags, trust_score,
-                       retrieval_count, helpful_count, created_at, updated_at
-                FROM facts
-                WHERE trust_score >= ?
-                  {category_clause}
-                ORDER BY trust_score DESC
-                LIMIT ?
-            """
-            rows = self._conn.execute(sql, params).fetchall()
-            return [self._row_to_dict(r) for r in rows]
-
-    def record_feedback(self, fact_id: int, helpful: bool) -> dict:
-        """Record user feedback and adjust trust asymmetrically.
-
-        helpful=True  -> trust += 0.05, helpful_count += 1
-        helpful=False -> trust -= 0.10
-
-        Returns a dict with fact_id, old_trust, new_trust, helpful_count.
-        Raises KeyError if fact_id does not exist.
-        """
-        with self._lock:
-            row = self._conn.execute(
-                "SELECT fact_id, trust_score, helpful_count FROM facts WHERE fact_id = ?",
-                (fact_id,),
-            ).fetchone()
-            if row is None:
-                raise KeyError(f"fact_id {fact_id} not found")
-
-            old_trust: float = row["trust_score"]
-            delta = _HELPFUL_DELTA if helpful else _UNHELPFUL_DELTA
-            new_trust = _clamp_trust(old_trust + delta)
-
-            helpful_increment = 1 if helpful else 0
-            self._conn.execute(
-                """
-                UPDATE facts
-                SET trust_score    = ?,
-                    helpful_count  = helpful_count + ?,
-                    updated_at     = CURRENT_TIMESTAMP
-                WHERE fact_id = ?
-                """,
-                (new_trust, helpful_increment, fact_id),
-            )
-            self._conn.commit()
-
-            return {
-                "fact_id":      fact_id,
-                "old_trust":    old_trust,
-                "new_trust":    new_trust,
-                "helpful_count": row["helpful_count"] + helpful_increment,
-            }
-
-    # ------------------------------------------------------------------
-    # Entity helpers
-    # ------------------------------------------------------------------
-
-    def _extract_entities(self, text: str) -> list[str]:
-        """Extract entity candidates from text using simple regex rules.
-
-        Rules applied (in order):
-        1. Capitalized multi-word phrases  e.g. "John Doe"
-        2. Double-quoted terms             e.g. "Python"
-        3. Single-quoted terms             e.g. 'pytest'
-        4. AKA patterns                    e.g. "Guido aka BDFL" -> two entities
-
-        Returns a deduplicated list preserving first-seen order.
-        """
-        seen: set[str] = set()
-        candidates: list[str] = []
-
-        def _add(name: str) -> None:
-            stripped = name.strip()
-            if stripped and stripped.lower() not in seen:
-                seen.add(stripped.lower())
-                candidates.append(stripped)
-
-        for m in _RE_CAPITALIZED.finditer(text):
-            _add(m.group(1))
-
-        for m in _RE_DOUBLE_QUOTE.finditer(text):
-            _add(m.group(1))
-
-        for m in _RE_SINGLE_QUOTE.finditer(text):
-            _add(m.group(1))
-
-        for m in _RE_AKA.finditer(text):
-            _add(m.group(1))
-            _add(m.group(2))
-
-        return candidates
-
-    def _resolve_entity(self, name: str) -> int:
-        """Find an existing entity by name or alias (case-insensitive) or create one.
-
-        Returns the entity_id.
-        """
-        # Exact name match
-        row = self._conn.execute(
-            "SELECT entity_id FROM entities WHERE name LIKE ?", (name,)
-        ).fetchone()
-        if row is not None:
-            return int(row["entity_id"])
-
-        # Search aliases — aliases stored as comma-separated; use LIKE with % boundaries
-        alias_row = self._conn.execute(
-            """
-            SELECT entity_id FROM entities
-            WHERE ',' || aliases || ',' LIKE '%,' || ? || ',%'
-            """,
-            (name,),
-        ).fetchone()
-        if alias_row is not None:
-            return int(alias_row["entity_id"])
-
-        # Create new entity
-        cur = self._conn.execute(
-            "INSERT INTO entities (name) VALUES (?)", (name,)
-        )
-        self._conn.commit()
-        return int(cur.lastrowid)  # type: ignore[return-value]
-
-    def _link_fact_entity(self, fact_id: int, entity_id: int) -> None:
-        """Insert into fact_entities, silently ignore if the link already exists."""
-        self._conn.execute(
-            """
-            INSERT OR IGNORE INTO fact_entities (fact_id, entity_id)
-            VALUES (?, ?)
-            """,
-            (fact_id, entity_id),
-        )
-        self._conn.commit()
-
-    def _compute_hrr_vector(self, fact_id: int, content: str) -> None:
-        """Compute and store HRR vector for a fact. No-op if numpy unavailable."""
-        with self._lock:
-            if not self._hrr_available:
-                return
-
-            # Get entities linked to this fact
-            rows = self._conn.execute(
-                """
-                SELECT e.name FROM entities e
-                JOIN fact_entities fe ON fe.entity_id = e.entity_id
-                WHERE fe.fact_id = ?
-                """,
-                (fact_id,),
-            ).fetchall()
-            entities = [row["name"] for row in rows]
-
-            vector = hrr.encode_fact(content, entities, self.hrr_dim)
-            self._conn.execute(
-                "UPDATE facts SET hrr_vector = ? WHERE fact_id = ?",
-                (hrr.phases_to_bytes(vector), fact_id),
-            )
-            self._conn.commit()
-
-    def _rebuild_bank(self, category: str) -> None:
-        """Full rebuild of a category's memory bank from all its fact vectors."""
-        with self._lock:
-            if not self._hrr_available:
-                return
-
-            bank_name = f"cat:{category}"
-            rows = self._conn.execute(
-                "SELECT hrr_vector FROM facts WHERE category = ? AND hrr_vector IS NOT NULL",
-                (category,),
-            ).fetchall()
-
-            if not rows:
-                self._conn.execute("DELETE FROM memory_banks WHERE bank_name = ?", (bank_name,))
-                self._conn.commit()
-                return
-
-            vectors = [hrr.bytes_to_phases(row["hrr_vector"]) for row in rows]
-            bank_vector = hrr.bundle(*vectors)
-            fact_count = len(vectors)
-
-            # Check SNR
-            hrr.snr_estimate(self.hrr_dim, fact_count)
-
-            self._conn.execute(
-                """
-                INSERT INTO memory_banks (bank_name, vector, dim, fact_count, updated_at)
-                VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
-                ON CONFLICT(bank_name) DO UPDATE SET
-                    vector = excluded.vector,
-                    dim = excluded.dim,
-                    fact_count = excluded.fact_count,
-                    updated_at = excluded.updated_at
-                """,
-                (bank_name, hrr.phases_to_bytes(bank_vector), self.hrr_dim, fact_count),
-            )
-            self._conn.commit()
-
-    def rebuild_all_vectors(self, dim: int | None = None) -> int:
-        """Recompute all HRR vectors + banks from text. For recovery/migration.
-
-        Returns the number of facts processed.
-        """
-        with self._lock:
-            if not self._hrr_available:
-                return 0
-
-            if dim is not None:
-                self.hrr_dim = dim
-
-            rows = self._conn.execute(
-                "SELECT fact_id, content, category FROM facts"
-            ).fetchall()
-
-            categories: set[str] = set()
-            for row in rows:
-                self._compute_hrr_vector(row["fact_id"], row["content"])
-                categories.add(row["category"])
-
-            for category in categories:
-                self._rebuild_bank(category)
-
-            return len(rows)
-
-    # ------------------------------------------------------------------
-    # Utilities
-    # ------------------------------------------------------------------
-
-    def _row_to_dict(self, row: sqlite3.Row) -> dict:
-        """Convert a sqlite3.Row to a plain dict."""
-        return dict(row)
-
-    def close(self) -> None:
-        """Close the database connection."""
-        self._conn.close()
-
-    def __enter__(self) -> "MemoryStore":
-        return self
-
-    def __exit__(self, *_: object) -> None:
-        self.close()
--- a/plugins/memory/honcho/README.md
+++ b/plugins/memory/honcho/README.md
@@ -1,35 +0,0 @@
-# Honcho Memory Provider
-
-AI-native cross-session user modeling with dialectic Q&A, semantic search, peer cards, and persistent conclusions.
-
-## Requirements
-
- `pip install honcho-ai`
- Honcho API key from [app.honcho.dev](https://app.honcho.dev)
-
-## Setup
-
-```bash
-hermes memory setup    # select "honcho"
-```
-
-Or manually:
-```bash
-hermes config set memory.provider honcho
-echo "HONCHO_API_KEY=your-key" >> ~/.hermes/.env
-```
-
-## Config
-
-Config file: `$HERMES_HOME/honcho.json` (or `~/.honcho/config.json` legacy)
-
-Existing Honcho users: your config and data are preserved. Just set `memory.provider: honcho`.
-
-## Tools
-
-| Tool | Description |
-|------|-------------|
-| `honcho_profile` | User's peer card — key facts, no LLM |
-| `honcho_search` | Semantic search over stored context |
-| `honcho_context` | LLM-synthesized answer from memory |
-| `honcho_conclude` | Write a fact about the user to memory |
--- a/plugins/memory/honcho/init.py
+++ b/plugins/memory/honcho/init.py
@@ -1,355 +0,0 @@
-"""Honcho memory plugin — MemoryProvider for Honcho AI-native memory.
-
-Provides cross-session user modeling with dialectic Q&A, semantic search,
-peer cards, and persistent conclusions via the Honcho SDK. Honcho provides AI-native cross-session user
-modeling with dialectic Q&A, semantic search, peer cards, and conclusions.
-
-The 4 tools (profile, search, context, conclude) are exposed through
-the MemoryProvider interface.
-
-Config: Uses the existing Honcho config chain:
-  1. $HERMES_HOME/honcho.json (profile-scoped)
-  2. ~/.honcho/config.json (legacy global)
-  3. Environment variables
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import threading
-from typing import Any, Dict, List, Optional
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-
-# ---------------------------------------------------------------------------
-# Tool schemas (moved from tools/honcho_tools.py)
-# ---------------------------------------------------------------------------
-
-PROFILE_SCHEMA = {
-    "name": "honcho_profile",
-    "description": (
-        "Retrieve the user's peer card from Honcho — a curated list of key facts "
-        "about them (name, role, preferences, communication style, patterns). "
-        "Fast, no LLM reasoning, minimal cost. "
-        "Use this at conversation start or when you need a quick factual snapshot."
-    ),
-    "parameters": {"type": "object", "properties": {}, "required": []},
-}
-
-SEARCH_SCHEMA = {
-    "name": "honcho_search",
-    "description": (
-        "Semantic search over Honcho's stored context about the user. "
-        "Returns raw excerpts ranked by relevance — no LLM synthesis. "
-        "Cheaper and faster than honcho_context. "
-        "Good when you want to find specific past facts and reason over them yourself."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {
-                "type": "string",
-                "description": "What to search for in Honcho's memory.",
-            },
-            "max_tokens": {
-                "type": "integer",
-                "description": "Token budget for returned context (default 800, max 2000).",
-            },
-        },
-        "required": ["query"],
-    },
-}
-
-CONTEXT_SCHEMA = {
-    "name": "honcho_context",
-    "description": (
-        "Ask Honcho a natural language question and get a synthesized answer. "
-        "Uses Honcho's LLM (dialectic reasoning) — higher cost than honcho_profile or honcho_search. "
-        "Can query about any peer: the user (default) or the AI assistant."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {
-                "type": "string",
-                "description": "A natural language question.",
-            },
-            "peer": {
-                "type": "string",
-                "description": "Which peer to query about: 'user' (default) or 'ai'.",
-            },
-        },
-        "required": ["query"],
-    },
-}
-
-CONCLUDE_SCHEMA = {
-    "name": "honcho_conclude",
-    "description": (
-        "Write a conclusion about the user back to Honcho's memory. "
-        "Conclusions are persistent facts that build the user's profile. "
-        "Use when the user states a preference, corrects you, or shares "
-        "something to remember across sessions."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "conclusion": {
-                "type": "string",
-                "description": "A factual statement about the user to persist.",
-            }
-        },
-        "required": ["conclusion"],
-    },
-}
-
-
-# ---------------------------------------------------------------------------
-# MemoryProvider implementation
-# ---------------------------------------------------------------------------
-
-class HonchoMemoryProvider(MemoryProvider):
-    """Honcho AI-native memory with dialectic Q&A and persistent user modeling."""
-
-    def __init__(self):
-        self._manager = None   # HonchoSessionManager
-        self._config = None    # HonchoClientConfig
-        self._session_key = ""
-        self._prefetch_result = ""
-        self._prefetch_lock = threading.Lock()
-        self._prefetch_thread: Optional[threading.Thread] = None
-        self._sync_thread: Optional[threading.Thread] = None
-
-    @property
-    def name(self) -> str:
-        return "honcho"
-
-    def is_available(self) -> bool:
-        """Check if Honcho is configured. No network calls."""
-        try:
-            from plugins.memory.honcho.client import HonchoClientConfig
-            cfg = HonchoClientConfig.from_global_config()
-            return cfg.enabled and bool(cfg.api_key or cfg.base_url)
-        except Exception:
-            return False
-
-    def save_config(self, values, hermes_home):
-        """Write config to $HERMES_HOME/honcho.json (Honcho SDK native format)."""
-        import json
-        from pathlib import Path
-        config_path = Path(hermes_home) / "honcho.json"
-        existing = {}
-        if config_path.exists():
-            try:
-                existing = json.loads(config_path.read_text())
-            except Exception:
-                pass
-        existing.update(values)
-        config_path.write_text(json.dumps(existing, indent=2))
-
-    def get_config_schema(self):
-        return [
-            {"key": "api_key", "description": "Honcho API key", "secret": True, "env_var": "HONCHO_API_KEY", "url": "https://app.honcho.dev"},
-            {"key": "base_url", "description": "Honcho base URL", "default": "https://api.honcho.dev"},
-        ]
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        """Initialize Honcho session manager."""
-        try:
-            from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client
-            from plugins.memory.honcho.session import HonchoSessionManager
-
-            cfg = HonchoClientConfig.from_global_config()
-            if not cfg.enabled or not (cfg.api_key or cfg.base_url):
-                logger.debug("Honcho not configured — plugin inactive")
-                return
-
-            self._config = cfg
-            client = get_honcho_client(cfg)
-            self._manager = HonchoSessionManager(
-                honcho=client,
-                config=cfg,
-                context_tokens=cfg.context_tokens,
-            )
-
-            # Build session key from kwargs or session_id
-            platform = kwargs.get("platform", "cli")
-            user_id = kwargs.get("user_id", "")
-            if user_id:
-                self._session_key = f"{platform}:{user_id}"
-            else:
-                self._session_key = session_id
-
-        except ImportError:
-            logger.debug("honcho-ai package not installed — plugin inactive")
-        except Exception as e:
-            logger.warning("Honcho init failed: %s", e)
-            self._manager = None
-
-    def system_prompt_block(self) -> str:
-        if not self._manager or not self._session_key:
-            return ""
-        return (
-            "# Honcho Memory\n"
-            "Active. AI-native cross-session user modeling.\n"
-            "Use honcho_profile for a quick factual snapshot, "
-            "honcho_search for raw excerpts, honcho_context for synthesized answers, "
-            "honcho_conclude to save facts about the user."
-        )
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        """Return prefetched dialectic context from background thread."""
-        if self._prefetch_thread and self._prefetch_thread.is_alive():
-            self._prefetch_thread.join(timeout=3.0)
-        with self._prefetch_lock:
-            result = self._prefetch_result
-            self._prefetch_result = ""
-        if not result:
-            return ""
-        return f"## Honcho Context\n{result}"
-
-    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        """Fire a background dialectic query for the upcoming turn."""
-        if not self._manager or not self._session_key or not query:
-            return
-
-        def _run():
-            try:
-                result = self._manager.dialectic_query(
-                    self._session_key, query, peer="user"
-                )
-                if result and result.strip():
-                    with self._prefetch_lock:
-                        self._prefetch_result = result
-            except Exception as e:
-                logger.debug("Honcho prefetch failed: %s", e)
-
-        self._prefetch_thread = threading.Thread(
-            target=_run, daemon=True, name="honcho-prefetch"
-        )
-        self._prefetch_thread.start()
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Record the conversation turn in Honcho (non-blocking)."""
-        if not self._manager or not self._session_key:
-            return
-
-        def _sync():
-            try:
-                session = self._manager.get_or_create_session(self._session_key)
-                session.add_message("user", user_content[:4000])
-                session.add_message("assistant", assistant_content[:4000])
-                # Flush to Honcho API
-                self._manager._flush_session(session)
-            except Exception as e:
-                logger.debug("Honcho sync_turn failed: %s", e)
-
-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=5.0)
-        self._sync_thread = threading.Thread(
-            target=_sync, daemon=True, name="honcho-sync"
-        )
-        self._sync_thread.start()
-
-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        """Mirror built-in user profile writes as Honcho conclusions."""
-        if action != "add" or target != "user" or not content:
-            return
-        if not self._manager or not self._session_key:
-            return
-
-        def _write():
-            try:
-                self._manager.create_conclusion(self._session_key, content)
-            except Exception as e:
-                logger.debug("Honcho memory mirror failed: %s", e)
-
-        t = threading.Thread(target=_write, daemon=True, name="honcho-memwrite")
-        t.start()
-
-    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
-        """Flush all pending messages to Honcho on session end."""
-        if not self._manager:
-            return
-        # Wait for pending sync
-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=10.0)
-        try:
-            self._manager.flush_all()
-        except Exception as e:
-            logger.debug("Honcho session-end flush failed: %s", e)
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        return [PROFILE_SCHEMA, SEARCH_SCHEMA, CONTEXT_SCHEMA, CONCLUDE_SCHEMA]
-
-    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
-        if not self._manager or not self._session_key:
-            return json.dumps({"error": "Honcho is not active for this session."})
-
-        try:
-            if tool_name == "honcho_profile":
-                card = self._manager.get_peer_card(self._session_key)
-                if not card:
-                    return json.dumps({"result": "No profile facts available yet."})
-                return json.dumps({"result": card})
-
-            elif tool_name == "honcho_search":
-                query = args.get("query", "")
-                if not query:
-                    return json.dumps({"error": "Missing required parameter: query"})
-                max_tokens = min(int(args.get("max_tokens", 800)), 2000)
-                result = self._manager.search_context(
-                    self._session_key, query, max_tokens=max_tokens
-                )
-                if not result:
-                    return json.dumps({"result": "No relevant context found."})
-                return json.dumps({"result": result})
-
-            elif tool_name == "honcho_context":
-                query = args.get("query", "")
-                if not query:
-                    return json.dumps({"error": "Missing required parameter: query"})
-                peer = args.get("peer", "user")
-                result = self._manager.dialectic_query(
-                    self._session_key, query, peer=peer
-                )
-                return json.dumps({"result": result or "No result from Honcho."})
-
-            elif tool_name == "honcho_conclude":
-                conclusion = args.get("conclusion", "")
-                if not conclusion:
-                    return json.dumps({"error": "Missing required parameter: conclusion"})
-                ok = self._manager.create_conclusion(self._session_key, conclusion)
-                if ok:
-                    return json.dumps({"result": f"Conclusion saved: {conclusion}"})
-                return json.dumps({"error": "Failed to save conclusion."})
-
-            return json.dumps({"error": f"Unknown tool: {tool_name}"})
-
-        except Exception as e:
-            logger.error("Honcho tool %s failed: %s", tool_name, e)
-            return json.dumps({"error": f"Honcho {tool_name} failed: {e}"})
-
-    def shutdown(self) -> None:
-        for t in (self._prefetch_thread, self._sync_thread):
-            if t and t.is_alive():
-                t.join(timeout=5.0)
-        # Flush any remaining messages
-        if self._manager:
-            try:
-                self._manager.flush_all()
-            except Exception:
-                pass
-
-
-# ---------------------------------------------------------------------------
-# Plugin entry point
-# ---------------------------------------------------------------------------
-
-def register(ctx) -> None:
-    """Register Honcho as a memory provider plugin."""
-    ctx.register_memory_provider(HonchoMemoryProvider())
--- a/plugins/memory/honcho/plugin.yaml
+++ b/plugins/memory/honcho/plugin.yaml
@@ -1,7 +0,0 @@
-name: honcho
-version: 1.0.0
-description: "Honcho AI-native memory — cross-session user modeling with dialectic Q&A, semantic search, and persistent conclusions."
-pip_dependencies:
-  - honcho-ai
-hooks:
-  - on_session_end
--- a/plugins/memory/mem0/README.md
+++ b/plugins/memory/mem0/README.md
@@ -1,38 +0,0 @@
-# Mem0 Memory Provider
-
-Server-side LLM fact extraction with semantic search, reranking, and automatic deduplication.
-
-## Requirements
-
- `pip install mem0ai`
- Mem0 API key from [app.mem0.ai](https://app.mem0.ai)
-
-## Setup
-
-```bash
-hermes memory setup    # select "mem0"
-```
-
-Or manually:
-```bash
-hermes config set memory.provider mem0
-echo "MEM0_API_KEY=your-key" >> ~/.hermes/.env
-```
-
-## Config
-
-Config file: `$HERMES_HOME/mem0.json`
-
-| Key | Default | Description |
-|-----|---------|-------------|
-| `user_id` | `hermes-user` | User identifier on Mem0 |
-| `agent_id` | `hermes` | Agent identifier |
-| `rerank` | `true` | Enable reranking for recall |
-
-## Tools
-
-| Tool | Description |
-|------|-------------|
-| `mem0_profile` | All stored memories about the user |
-| `mem0_search` | Semantic search with optional reranking |
-| `mem0_conclude` | Store a fact verbatim (no LLM extraction) |
--- a/plugins/memory/mem0/init.py
+++ b/plugins/memory/mem0/init.py
@@ -1,344 +0,0 @@
-"""Mem0 memory plugin — MemoryProvider interface.
-
-Server-side LLM fact extraction, semantic search with reranking, and
-automatic deduplication via the Mem0 Platform API.
-
-Original PR #2933 by kartik-mem0, adapted to MemoryProvider ABC.
-
-Config via environment variables:
-  MEM0_API_KEY       — Mem0 Platform API key (required)
-  MEM0_USER_ID       — User identifier (default: hermes-user)
-  MEM0_AGENT_ID      — Agent identifier (default: hermes)
-
-Or via $HERMES_HOME/mem0.json.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import threading
-import time
-from pathlib import Path
-from typing import Any, Dict, List
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-# Circuit breaker: after this many consecutive failures, pause API calls
-# for _BREAKER_COOLDOWN_SECS to avoid hammering a down server.
-_BREAKER_THRESHOLD = 5
-_BREAKER_COOLDOWN_SECS = 120
-
-
-# ---------------------------------------------------------------------------
-# Config
-# ---------------------------------------------------------------------------
-
-def _load_config() -> dict:
-    """Load config from $HERMES_HOME/mem0.json or env vars."""
-    from hermes_constants import get_hermes_home
-    config_path = get_hermes_home() / "mem0.json"
-
-    if config_path.exists():
-        try:
-            return json.loads(config_path.read_text(encoding="utf-8"))
-        except Exception:
-            pass
-
-    return {
-        "api_key": os.environ.get("MEM0_API_KEY", ""),
-        "user_id": os.environ.get("MEM0_USER_ID", "hermes-user"),
-        "agent_id": os.environ.get("MEM0_AGENT_ID", "hermes"),
-        "rerank": True,
-        "keyword_search": False,
-    }
-
-
-# ---------------------------------------------------------------------------
-# Tool schemas
-# ---------------------------------------------------------------------------
-
-PROFILE_SCHEMA = {
-    "name": "mem0_profile",
-    "description": (
-        "Retrieve all stored memories about the user — preferences, facts, "
-        "project context. Fast, no reranking. Use at conversation start."
-    ),
-    "parameters": {"type": "object", "properties": {}, "required": []},
-}
-
-SEARCH_SCHEMA = {
-    "name": "mem0_search",
-    "description": (
-        "Search memories by meaning. Returns relevant facts ranked by similarity. "
-        "Set rerank=true for higher accuracy on important queries."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {"type": "string", "description": "What to search for."},
-            "rerank": {"type": "boolean", "description": "Enable reranking for precision (default: false)."},
-            "top_k": {"type": "integer", "description": "Max results (default: 10, max: 50)."},
-        },
-        "required": ["query"],
-    },
-}
-
-CONCLUDE_SCHEMA = {
-    "name": "mem0_conclude",
-    "description": (
-        "Store a durable fact about the user. Stored verbatim (no LLM extraction). "
-        "Use for explicit preferences, corrections, or decisions."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "conclusion": {"type": "string", "description": "The fact to store."},
-        },
-        "required": ["conclusion"],
-    },
-}
-
-
-# ---------------------------------------------------------------------------
-# MemoryProvider implementation
-# ---------------------------------------------------------------------------
-
-class Mem0MemoryProvider(MemoryProvider):
-    """Mem0 Platform memory with server-side extraction and semantic search."""
-
-    def __init__(self):
-        self._config = None
-        self._client = None
-        self._client_lock = threading.Lock()
-        self._api_key = ""
-        self._user_id = "hermes-user"
-        self._agent_id = "hermes"
-        self._rerank = True
-        self._prefetch_result = ""
-        self._prefetch_lock = threading.Lock()
-        self._prefetch_thread = None
-        self._sync_thread = None
-        # Circuit breaker state
-        self._consecutive_failures = 0
-        self._breaker_open_until = 0.0
-
-    @property
-    def name(self) -> str:
-        return "mem0"
-
-    def is_available(self) -> bool:
-        cfg = _load_config()
-        return bool(cfg.get("api_key"))
-
-    def save_config(self, values, hermes_home):
-        """Write config to $HERMES_HOME/mem0.json."""
-        import json
-        from pathlib import Path
-        config_path = Path(hermes_home) / "mem0.json"
-        existing = {}
-        if config_path.exists():
-            try:
-                existing = json.loads(config_path.read_text())
-            except Exception:
-                pass
-        existing.update(values)
-        config_path.write_text(json.dumps(existing, indent=2))
-
-    def get_config_schema(self):
-        return [
-            {"key": "api_key", "description": "Mem0 Platform API key", "secret": True, "required": True, "env_var": "MEM0_API_KEY", "url": "https://app.mem0.ai"},
-            {"key": "user_id", "description": "User identifier", "default": "hermes-user"},
-            {"key": "agent_id", "description": "Agent identifier", "default": "hermes"},
-            {"key": "rerank", "description": "Enable reranking for recall", "default": "true", "choices": ["true", "false"]},
-        ]
-
-    def _get_client(self):
-        """Thread-safe client accessor with lazy initialization."""
-        with self._client_lock:
-            if self._client is not None:
-                return self._client
-            try:
-                from mem0 import MemoryClient
-                self._client = MemoryClient(api_key=self._api_key)
-                return self._client
-            except ImportError:
-                raise RuntimeError("mem0 package not installed. Run: pip install mem0ai")
-
-    def _is_breaker_open(self) -> bool:
-        """Return True if the circuit breaker is tripped (too many failures)."""
-        if self._consecutive_failures < _BREAKER_THRESHOLD:
-            return False
-        if time.monotonic() >= self._breaker_open_until:
-            # Cooldown expired — reset and allow a retry
-            self._consecutive_failures = 0
-            return False
-        return True
-
-    def _record_success(self):
-        self._consecutive_failures = 0
-
-    def _record_failure(self):
-        self._consecutive_failures += 1
-        if self._consecutive_failures >= _BREAKER_THRESHOLD:
-            self._breaker_open_until = time.monotonic() + _BREAKER_COOLDOWN_SECS
-            logger.warning(
-                "Mem0 circuit breaker tripped after %d consecutive failures. "
-                "Pausing API calls for %ds.",
-                self._consecutive_failures, _BREAKER_COOLDOWN_SECS,
-            )
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        self._config = _load_config()
-        self._api_key = self._config.get("api_key", "")
-        self._user_id = self._config.get("user_id", "hermes-user")
-        self._agent_id = self._config.get("agent_id", "hermes")
-        self._rerank = self._config.get("rerank", True)
-
-    def system_prompt_block(self) -> str:
-        return (
-            "# Mem0 Memory\n"
-            f"Active. User: {self._user_id}.\n"
-            "Use mem0_search to find memories, mem0_conclude to store facts, "
-            "mem0_profile for a full overview."
-        )
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        if self._prefetch_thread and self._prefetch_thread.is_alive():
-            self._prefetch_thread.join(timeout=3.0)
-        with self._prefetch_lock:
-            result = self._prefetch_result
-            self._prefetch_result = ""
-        if not result:
-            return ""
-        return f"## Mem0 Memory\n{result}"
-
-    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        if self._is_breaker_open():
-            return
-
-        def _run():
-            try:
-                client = self._get_client()
-                results = client.search(
-                    query=query,
-                    user_id=self._user_id,
-                    rerank=self._rerank,
-                    top_k=5,
-                )
-                if results:
-                    lines = [r.get("memory", "") for r in results if r.get("memory")]
-                    with self._prefetch_lock:
-                        self._prefetch_result = "\n".join(f"- {l}" for l in lines)
-                self._record_success()
-            except Exception as e:
-                self._record_failure()
-                logger.debug("Mem0 prefetch failed: %s", e)
-
-        self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="mem0-prefetch")
-        self._prefetch_thread.start()
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Send the turn to Mem0 for server-side fact extraction (non-blocking)."""
-        if self._is_breaker_open():
-            return
-
-        def _sync():
-            try:
-                client = self._get_client()
-                messages = [
-                    {"role": "user", "content": user_content},
-                    {"role": "assistant", "content": assistant_content},
-                ]
-                client.add(messages, user_id=self._user_id, agent_id=self._agent_id)
-                self._record_success()
-            except Exception as e:
-                self._record_failure()
-                logger.warning("Mem0 sync failed: %s", e)
-
-        # Wait for any previous sync before starting a new one
-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=5.0)
-
-        self._sync_thread = threading.Thread(target=_sync, daemon=True, name="mem0-sync")
-        self._sync_thread.start()
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        return [PROFILE_SCHEMA, SEARCH_SCHEMA, CONCLUDE_SCHEMA]
-
-    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
-        if self._is_breaker_open():
-            return json.dumps({
-                "error": "Mem0 API temporarily unavailable (multiple consecutive failures). Will retry automatically."
-            })
-
-        try:
-            client = self._get_client()
-        except Exception as e:
-            return json.dumps({"error": str(e)})
-
-        if tool_name == "mem0_profile":
-            try:
-                memories = client.get_all(user_id=self._user_id)
-                self._record_success()
-                if not memories:
-                    return json.dumps({"result": "No memories stored yet."})
-                lines = [m.get("memory", "") for m in memories if m.get("memory")]
-                return json.dumps({"result": "\n".join(lines), "count": len(lines)})
-            except Exception as e:
-                self._record_failure()
-                return json.dumps({"error": f"Failed to fetch profile: {e}"})
-
-        elif tool_name == "mem0_search":
-            query = args.get("query", "")
-            if not query:
-                return json.dumps({"error": "Missing required parameter: query"})
-            rerank = args.get("rerank", False)
-            top_k = min(int(args.get("top_k", 10)), 50)
-            try:
-                results = client.search(
-                    query=query, user_id=self._user_id,
-                    rerank=rerank, top_k=top_k,
-                )
-                self._record_success()
-                if not results:
-                    return json.dumps({"result": "No relevant memories found."})
-                items = [{"memory": r.get("memory", ""), "score": r.get("score", 0)} for r in results]
-                return json.dumps({"results": items, "count": len(items)})
-            except Exception as e:
-                self._record_failure()
-                return json.dumps({"error": f"Search failed: {e}"})
-
-        elif tool_name == "mem0_conclude":
-            conclusion = args.get("conclusion", "")
-            if not conclusion:
-                return json.dumps({"error": "Missing required parameter: conclusion"})
-            try:
-                client.add(
-                    [{"role": "user", "content": conclusion}],
-                    user_id=self._user_id,
-                    agent_id=self._agent_id,
-                    infer=False,
-                )
-                self._record_success()
-                return json.dumps({"result": "Fact stored."})
-            except Exception as e:
-                self._record_failure()
-                return json.dumps({"error": f"Failed to store: {e}"})
-
-        return json.dumps({"error": f"Unknown tool: {tool_name}"})
-
-    def shutdown(self) -> None:
-        for t in (self._prefetch_thread, self._sync_thread):
-            if t and t.is_alive():
-                t.join(timeout=5.0)
-        with self._client_lock:
-            self._client = None
-
-
-def register(ctx) -> None:
-    """Register Mem0 as a memory provider plugin."""
-    ctx.register_memory_provider(Mem0MemoryProvider())
--- a/plugins/memory/mem0/plugin.yaml
+++ b/plugins/memory/mem0/plugin.yaml
@@ -1,5 +0,0 @@
-name: mem0
-version: 1.0.0
-description: "Mem0 — server-side LLM fact extraction with semantic search, reranking, and automatic deduplication."
-pip_dependencies:
-  - mem0ai
--- a/plugins/memory/openviking/README.md
+++ b/plugins/memory/openviking/README.md
@@ -1,40 +0,0 @@
-# OpenViking Memory Provider
-
-Context database by Volcengine (ByteDance) with filesystem-style knowledge hierarchy, tiered retrieval, and automatic memory extraction.
-
-## Requirements
-
- `pip install openviking`
- OpenViking server running (`openviking-server`)
- Embedding + VLM model configured in `~/.openviking/ov.conf`
-
-## Setup
-
-```bash
-hermes memory setup    # select "openviking"
-```
-
-Or manually:
-```bash
-hermes config set memory.provider openviking
-echo "OPENVIKING_ENDPOINT=http://localhost:1933" >> ~/.hermes/.env
-```
-
-## Config
-
-All config via environment variables in `.env`:
-
-| Env Var | Default | Description |
-|---------|---------|-------------|
-| `OPENVIKING_ENDPOINT` | `http://127.0.0.1:1933` | Server URL |
-| `OPENVIKING_API_KEY` | (none) | API key (optional) |
-
-## Tools
-
-| Tool | Description |
-|------|-------------|
-| `viking_search` | Semantic search with fast/deep/auto modes |
-| `viking_read` | Read content at a viking:// URI (abstract/overview/full) |
-| `viking_browse` | Filesystem-style navigation (list/tree/stat) |
-| `viking_remember` | Store a fact for extraction on session commit |
-| `viking_add_resource` | Ingest URLs/docs into the knowledge base |
--- a/plugins/memory/openviking/init.py
+++ b/plugins/memory/openviking/init.py
@@ -1,582 +0,0 @@
-"""OpenViking memory plugin — full bidirectional MemoryProvider interface.
-
-Context database by Volcengine (ByteDance) that organizes agent knowledge
-into a filesystem hierarchy (viking:// URIs) with tiered context loading,
-automatic memory extraction, and session management.
-
-Original PR #3369 by Mibayy, rewritten to use the full OpenViking session
-lifecycle instead of read-only search endpoints.
-
-Config via environment variables (profile-scoped via each profile's .env):
-  OPENVIKING_ENDPOINT  — Server URL (default: http://127.0.0.1:1933)
-  OPENVIKING_API_KEY   — API key (required for authenticated servers)
-
-Capabilities:
-  - Automatic memory extraction on session commit (6 categories)
-  - Tiered context: L0 (~100 tokens), L1 (~2k), L2 (full)
-  - Semantic search with hierarchical directory retrieval
-  - Filesystem-style browsing via viking:// URIs
-  - Resource ingestion (URLs, docs, code)
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import threading
-from typing import Any, Dict, List, Optional
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-_DEFAULT_ENDPOINT = "http://127.0.0.1:1933"
-_TIMEOUT = 30.0
-
-
-# ---------------------------------------------------------------------------
-# HTTP helper — uses httpx to avoid requiring the openviking SDK
-# ---------------------------------------------------------------------------
-
-def _get_httpx():
-    """Lazy import httpx."""
-    try:
-        import httpx
-        return httpx
-    except ImportError:
-        return None
-
-
-class _VikingClient:
-    """Thin HTTP client for the OpenViking REST API."""
-
-    def __init__(self, endpoint: str, api_key: str = ""):
-        self._endpoint = endpoint.rstrip("/")
-        self._api_key = api_key
-        self._httpx = _get_httpx()
-        if self._httpx is None:
-            raise ImportError("httpx is required for OpenViking: pip install httpx")
-
-    def _headers(self) -> dict:
-        h = {"Content-Type": "application/json"}
-        if self._api_key:
-            h["X-API-Key"] = self._api_key
-        return h
-
-    def _url(self, path: str) -> str:
-        return f"{self._endpoint}{path}"
-
-    def get(self, path: str, **kwargs) -> dict:
-        resp = self._httpx.get(
-            self._url(path), headers=self._headers(), timeout=_TIMEOUT, **kwargs
-        )
-        resp.raise_for_status()
-        return resp.json()
-
-    def post(self, path: str, payload: dict = None, **kwargs) -> dict:
-        resp = self._httpx.post(
-            self._url(path), json=payload or {}, headers=self._headers(),
-            timeout=_TIMEOUT, **kwargs
-        )
-        resp.raise_for_status()
-        return resp.json()
-
-    def health(self) -> bool:
-        try:
-            resp = self._httpx.get(
-                self._url("/health"), timeout=3.0
-            )
-            return resp.status_code == 200
-        except Exception:
-            return False
-
-
-# ---------------------------------------------------------------------------
-# Tool schemas
-# ---------------------------------------------------------------------------
-
-SEARCH_SCHEMA = {
-    "name": "viking_search",
-    "description": (
-        "Semantic search over the OpenViking knowledge base. "
-        "Returns ranked results with viking:// URIs for deeper reading. "
-        "Use mode='deep' for complex queries that need reasoning across "
-        "multiple sources, 'fast' for simple lookups."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {"type": "string", "description": "Search query."},
-            "mode": {
-                "type": "string", "enum": ["auto", "fast", "deep"],
-                "description": "Search depth (default: auto).",
-            },
-            "scope": {
-                "type": "string",
-                "description": "Viking URI prefix to scope search (e.g. 'viking://resources/docs/').",
-            },
-            "limit": {"type": "integer", "description": "Max results (default: 10)."},
-        },
-        "required": ["query"],
-    },
-}
-
-READ_SCHEMA = {
-    "name": "viking_read",
-    "description": (
-        "Read content at a viking:// URI. Three detail levels:\n"
-        "  abstract — ~100 token summary (L0)\n"
-        "  overview — ~2k token key points (L1)\n"
-        "  full — complete content (L2)\n"
-        "Start with abstract/overview, only use full when you need details."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "uri": {"type": "string", "description": "viking:// URI to read."},
-            "level": {
-                "type": "string", "enum": ["abstract", "overview", "full"],
-                "description": "Detail level (default: overview).",
-            },
-        },
-        "required": ["uri"],
-    },
-}
-
-BROWSE_SCHEMA = {
-    "name": "viking_browse",
-    "description": (
-        "Browse the OpenViking knowledge store like a filesystem.\n"
-        "  list — show directory contents\n"
-        "  tree — show hierarchy\n"
-        "  stat — show metadata for a URI"
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "action": {
-                "type": "string", "enum": ["tree", "list", "stat"],
-                "description": "Browse action.",
-            },
-            "path": {
-                "type": "string",
-                "description": "Viking URI path (default: viking://). Examples: 'viking://resources/', 'viking://user/memories/'.",
-            },
-        },
-        "required": ["action"],
-    },
-}
-
-REMEMBER_SCHEMA = {
-    "name": "viking_remember",
-    "description": (
-        "Explicitly store a fact or memory in the OpenViking knowledge base. "
-        "Use for important information the agent should remember long-term. "
-        "The system automatically categorizes and indexes the memory."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "content": {"type": "string", "description": "The information to remember."},
-            "category": {
-                "type": "string",
-                "enum": ["preference", "entity", "event", "case", "pattern"],
-                "description": "Memory category (default: auto-detected).",
-            },
-        },
-        "required": ["content"],
-    },
-}
-
-ADD_RESOURCE_SCHEMA = {
-    "name": "viking_add_resource",
-    "description": (
-        "Add a URL or document to the OpenViking knowledge base. "
-        "Supports web pages, GitHub repos, PDFs, markdown, code files. "
-        "The system automatically parses, indexes, and generates summaries."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "url": {"type": "string", "description": "URL or path of the resource to add."},
-            "reason": {
-                "type": "string",
-                "description": "Why this resource is relevant (improves search).",
-            },
-        },
-        "required": ["url"],
-    },
-}
-
-
-# ---------------------------------------------------------------------------
-# MemoryProvider implementation
-# ---------------------------------------------------------------------------
-
-class OpenVikingMemoryProvider(MemoryProvider):
-    """Full bidirectional memory via OpenViking context database."""
-
-    def __init__(self):
-        self._client: Optional[_VikingClient] = None
-        self._endpoint = ""
-        self._api_key = ""
-        self._session_id = ""
-        self._turn_count = 0
-        self._sync_thread: Optional[threading.Thread] = None
-        self._prefetch_result = ""
-        self._prefetch_lock = threading.Lock()
-        self._prefetch_thread: Optional[threading.Thread] = None
-
-    @property
-    def name(self) -> str:
-        return "openviking"
-
-    def is_available(self) -> bool:
-        """Check if OpenViking endpoint is configured. No network calls."""
-        return bool(os.environ.get("OPENVIKING_ENDPOINT"))
-
-    def get_config_schema(self):
-        return [
-            {
-                "key": "endpoint",
-                "description": "OpenViking server URL",
-                "required": True,
-                "default": _DEFAULT_ENDPOINT,
-                "env_var": "OPENVIKING_ENDPOINT",
-            },
-            {
-                "key": "api_key",
-                "description": "OpenViking API key",
-                "secret": True,
-                "env_var": "OPENVIKING_API_KEY",
-            },
-        ]
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        self._endpoint = os.environ.get("OPENVIKING_ENDPOINT", _DEFAULT_ENDPOINT)
-        self._api_key = os.environ.get("OPENVIKING_API_KEY", "")
-        self._session_id = session_id
-        self._turn_count = 0
-
-        try:
-            self._client = _VikingClient(self._endpoint, self._api_key)
-            if not self._client.health():
-                logger.warning("OpenViking server at %s is not reachable", self._endpoint)
-                self._client = None
-        except ImportError:
-            logger.warning("httpx not installed — OpenViking plugin disabled")
-            self._client = None
-
-    def system_prompt_block(self) -> str:
-        if not self._client:
-            return ""
-        # Provide brief info about the knowledge base
-        try:
-            # Check what's in the knowledge base via a root listing
-            resp = self._client.post("/api/v1/browse", {"action": "stat", "path": "viking://"})
-            result = resp.get("result", {})
-            children = result.get("children", 0)
-            if children == 0:
-                return ""
-            return (
-                "# OpenViking Knowledge Base\n"
-                f"Active. Endpoint: {self._endpoint}\n"
-                "Use viking_search to find information, viking_read for details "
-                "(abstract/overview/full), viking_browse to explore.\n"
-                "Use viking_remember to store facts, viking_add_resource to index URLs/docs."
-            )
-        except Exception:
-            return (
-                "# OpenViking Knowledge Base\n"
-                f"Active. Endpoint: {self._endpoint}\n"
-                "Use viking_search, viking_read, viking_browse, "
-                "viking_remember, viking_add_resource."
-            )
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        """Return prefetched results from the background thread."""
-        if self._prefetch_thread and self._prefetch_thread.is_alive():
-            self._prefetch_thread.join(timeout=3.0)
-        with self._prefetch_lock:
-            result = self._prefetch_result
-            self._prefetch_result = ""
-        if not result:
-            return ""
-        return f"## OpenViking Context\n{result}"
-
-    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        """Fire a background search to pre-load relevant context."""
-        if not self._client or not query:
-            return
-
-        def _run():
-            try:
-                client = _VikingClient(self._endpoint, self._api_key)
-                resp = client.post("/api/v1/search/find", {
-                    "query": query,
-                    "top_k": 5,
-                })
-                result = resp.get("result", {})
-                parts = []
-                for ctx_type in ("memories", "resources"):
-                    items = result.get(ctx_type, [])
-                    for item in items[:3]:
-                        uri = item.get("uri", "")
-                        abstract = item.get("abstract", "")
-                        score = item.get("score", 0)
-                        if abstract:
-                            parts.append(f"- [{score:.2f}] {abstract} ({uri})")
-                if parts:
-                    with self._prefetch_lock:
-                        self._prefetch_result = "\n".join(parts)
-            except Exception as e:
-                logger.debug("OpenViking prefetch failed: %s", e)
-
-        self._prefetch_thread = threading.Thread(
-            target=_run, daemon=True, name="openviking-prefetch"
-        )
-        self._prefetch_thread.start()
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Record the conversation turn in OpenViking's session (non-blocking)."""
-        if not self._client:
-            return
-
-        self._turn_count += 1
-
-        def _sync():
-            try:
-                client = _VikingClient(self._endpoint, self._api_key)
-                sid = self._session_id
-
-                # Add user message
-                client.post(f"/api/v1/sessions/{sid}/messages", {
-                    "role": "user",
-                    "content": user_content[:4000],  # trim very long messages
-                })
-                # Add assistant message
-                client.post(f"/api/v1/sessions/{sid}/messages", {
-                    "role": "assistant",
-                    "content": assistant_content[:4000],
-                })
-            except Exception as e:
-                logger.debug("OpenViking sync_turn failed: %s", e)
-
-        # Wait for any previous sync to finish before starting a new one
-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=5.0)
-
-        self._sync_thread = threading.Thread(
-            target=_sync, daemon=True, name="openviking-sync"
-        )
-        self._sync_thread.start()
-
-    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
-        """Commit the session to trigger memory extraction.
-
-        OpenViking automatically extracts 6 categories of memories:
-        profile, preferences, entities, events, cases, and patterns.
-        """
-        if not self._client or self._turn_count == 0:
-            return
-
-        # Wait for any pending sync to finish first
-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=10.0)
-
-        try:
-            self._client.post(f"/api/v1/sessions/{self._session_id}/commit")
-            logger.info("OpenViking session %s committed (%d turns)", self._session_id, self._turn_count)
-        except Exception as e:
-            logger.warning("OpenViking session commit failed: %s", e)
-
-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        """Mirror built-in memory writes to OpenViking as explicit memories."""
-        if not self._client or action != "add" or not content:
-            return
-
-        def _write():
-            try:
-                client = _VikingClient(self._endpoint, self._api_key)
-                # Add as a user message with memory context so the commit
-                # picks it up as an explicit memory during extraction
-                client.post(f"/api/v1/sessions/{self._session_id}/messages", {
-                    "role": "user",
-                    "parts": [
-                        {"type": "text", "text": f"[Memory note — {target}] {content}"},
-                    ],
-                })
-            except Exception as e:
-                logger.debug("OpenViking memory mirror failed: %s", e)
-
-        t = threading.Thread(target=_write, daemon=True, name="openviking-memwrite")
-        t.start()
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        return [SEARCH_SCHEMA, READ_SCHEMA, BROWSE_SCHEMA, REMEMBER_SCHEMA, ADD_RESOURCE_SCHEMA]
-
-    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
-        if not self._client:
-            return json.dumps({"error": "OpenViking server not connected"})
-
-        try:
-            if tool_name == "viking_search":
-                return self._tool_search(args)
-            elif tool_name == "viking_read":
-                return self._tool_read(args)
-            elif tool_name == "viking_browse":
-                return self._tool_browse(args)
-            elif tool_name == "viking_remember":
-                return self._tool_remember(args)
-            elif tool_name == "viking_add_resource":
-                return self._tool_add_resource(args)
-            return json.dumps({"error": f"Unknown tool: {tool_name}"})
-        except Exception as e:
-            return json.dumps({"error": str(e)})
-
-    def shutdown(self) -> None:
-        # Wait for background threads to finish
-        for t in (self._sync_thread, self._prefetch_thread):
-            if t and t.is_alive():
-                t.join(timeout=5.0)
-
-    # -- Tool implementations ------------------------------------------------
-
-    def _tool_search(self, args: dict) -> str:
-        query = args.get("query", "")
-        if not query:
-            return json.dumps({"error": "query is required"})
-
-        payload: Dict[str, Any] = {"query": query}
-        mode = args.get("mode", "auto")
-        if mode != "auto":
-            payload["mode"] = mode
-        if args.get("scope"):
-            payload["target_uri"] = args["scope"]
-        if args.get("limit"):
-            payload["top_k"] = args["limit"]
-
-        resp = self._client.post("/api/v1/search/find", payload)
-        result = resp.get("result", {})
-
-        # Format results for the model — keep it concise
-        formatted = []
-        for ctx_type in ("memories", "resources", "skills"):
-            items = result.get(ctx_type, [])
-            for item in items:
-                entry = {
-                    "uri": item.get("uri", ""),
-                    "type": ctx_type.rstrip("s"),
-                    "score": round(item.get("score", 0), 3),
-                    "abstract": item.get("abstract", ""),
-                }
-                if item.get("relations"):
-                    entry["related"] = [r.get("uri") for r in item["relations"][:3]]
-                formatted.append(entry)
-
-        return json.dumps({
-            "results": formatted,
-            "total": result.get("total", len(formatted)),
-        }, ensure_ascii=False)
-
-    def _tool_read(self, args: dict) -> str:
-        uri = args.get("uri", "")
-        if not uri:
-            return json.dumps({"error": "uri is required"})
-
-        level = args.get("level", "overview")
-        # Map our level names to OpenViking endpoints
-        if level == "abstract":
-            resp = self._client.post("/api/v1/read/abstract", {"uri": uri})
-        elif level == "full":
-            resp = self._client.post("/api/v1/read", {"uri": uri, "level": "read"})
-        else:  # overview
-            resp = self._client.post("/api/v1/read", {"uri": uri, "level": "overview"})
-
-        result = resp.get("result", {})
-        content = result.get("content", "")
-
-        # Truncate very long content to avoid flooding the context
-        if len(content) > 8000:
-            content = content[:8000] + "\n\n[... truncated, use a more specific URI or abstract level]"
-
-        return json.dumps({
-            "uri": uri,
-            "level": level,
-            "content": content,
-        }, ensure_ascii=False)
-
-    def _tool_browse(self, args: dict) -> str:
-        action = args.get("action", "list")
-        path = args.get("path", "viking://")
-
-        resp = self._client.post("/api/v1/browse", {
-            "action": action,
-            "path": path,
-        })
-        result = resp.get("result", {})
-
-        # Format for readability
-        if action == "list" and "entries" in result:
-            entries = []
-            for e in result["entries"][:50]:  # cap at 50 entries
-                entries.append({
-                    "name": e.get("name", ""),
-                    "uri": e.get("uri", ""),
-                    "type": "dir" if e.get("is_dir") else "file",
-                })
-            return json.dumps({"path": path, "entries": entries}, ensure_ascii=False)
-
-        return json.dumps(result, ensure_ascii=False)
-
-    def _tool_remember(self, args: dict) -> str:
-        content = args.get("content", "")
-        if not content:
-            return json.dumps({"error": "content is required"})
-
-        # Store as a session message that will be extracted during commit.
-        # The category hint helps OpenViking's extraction classify correctly.
-        category = args.get("category", "")
-        text = f"[Remember] {content}"
-        if category:
-            text = f"[Remember — {category}] {content}"
-
-        self._client.post(f"/api/v1/sessions/{self._session_id}/messages", {
-            "role": "user",
-            "parts": [
-                {"type": "text", "text": text},
-            ],
-        })
-
-        return json.dumps({
-            "status": "stored",
-            "message": "Memory recorded. Will be extracted and indexed on session commit.",
-        })
-
-    def _tool_add_resource(self, args: dict) -> str:
-        url = args.get("url", "")
-        if not url:
-            return json.dumps({"error": "url is required"})
-
-        payload: Dict[str, Any] = {"path": url}
-        if args.get("reason"):
-            payload["reason"] = args["reason"]
-
-        resp = self._client.post("/api/v1/resources", payload)
-        result = resp.get("result", {})
-
-        return json.dumps({
-            "status": "added",
-            "root_uri": result.get("root_uri", ""),
-            "message": "Resource queued for processing. Use viking_search after a moment to find it.",
-        }, ensure_ascii=False)
-
-
-# ---------------------------------------------------------------------------
-# Plugin entry point
-# ---------------------------------------------------------------------------
-
-def register(ctx) -> None:
-    """Register OpenViking as a memory provider plugin."""
-    ctx.register_memory_provider(OpenVikingMemoryProvider())
--- a/plugins/memory/openviking/plugin.yaml
+++ b/plugins/memory/openviking/plugin.yaml
@@ -1,9 +0,0 @@
-name: openviking
-version: 2.0.0
-description: "OpenViking context database — session-managed memory with automatic extraction, tiered retrieval, and filesystem-style knowledge browsing."
-pip_dependencies:
-  - httpx
-requires_env:
-  - OPENVIKING_ENDPOINT
-hooks:
-  - on_session_end
--- a/plugins/memory/retaindb/README.md
+++ b/plugins/memory/retaindb/README.md
@@ -1,40 +0,0 @@
-# RetainDB Memory Provider
-
-Cloud memory API with hybrid search (Vector + BM25 + Reranking) and 7 memory types.
-
-## Requirements
-
- RetainDB account ($20/month) from [retaindb.com](https://www.retaindb.com)
- `pip install requests`
-
-## Setup
-
-```bash
-hermes memory setup    # select "retaindb"
-```
-
-Or manually:
-```bash
-hermes config set memory.provider retaindb
-echo "RETAINDB_API_KEY=your-key" >> ~/.hermes/.env
-```
-
-## Config
-
-All config via environment variables in `.env`:
-
-| Env Var | Default | Description |
-|---------|---------|-------------|
-| `RETAINDB_API_KEY` | (required) | API key |
-| `RETAINDB_BASE_URL` | `https://api.retaindb.com` | API endpoint |
-| `RETAINDB_PROJECT` | auto (profile-scoped) | Project identifier |
-
-## Tools
-
-| Tool | Description |
-|------|-------------|
-| `retaindb_profile` | User's stable profile |
-| `retaindb_search` | Semantic search |
-| `retaindb_context` | Task-relevant context |
-| `retaindb_remember` | Store a fact with type + importance |
-| `retaindb_forget` | Delete a memory by ID |
--- a/plugins/memory/retaindb/init.py
+++ b/plugins/memory/retaindb/init.py
@@ -1,302 +0,0 @@
-"""RetainDB memory plugin — MemoryProvider interface.
-
-Cross-session memory via RetainDB cloud API. Durable write-behind queue,
-semantic search with deduplication, and user profile retrieval.
-
-Original PR #2732 by Alinxus, adapted to MemoryProvider ABC.
-
-Config via environment variables:
-  RETAINDB_API_KEY    — API key (required)
-  RETAINDB_BASE_URL   — API endpoint (default: https://api.retaindb.com)
-  RETAINDB_PROJECT    — Project identifier (default: hermes)
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import threading
-from typing import Any, Dict, List
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-_DEFAULT_BASE_URL = "https://api.retaindb.com"
-
-
-# ---------------------------------------------------------------------------
-# Tool schemas
-# ---------------------------------------------------------------------------
-
-PROFILE_SCHEMA = {
-    "name": "retaindb_profile",
-    "description": "Get the user's stable profile — preferences, facts, and patterns.",
-    "parameters": {"type": "object", "properties": {}, "required": []},
-}
-
-SEARCH_SCHEMA = {
-    "name": "retaindb_search",
-    "description": (
-        "Semantic search across stored memories. Returns ranked results "
-        "with relevance scores."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {"type": "string", "description": "What to search for."},
-            "top_k": {"type": "integer", "description": "Max results (default: 8, max: 20)."},
-        },
-        "required": ["query"],
-    },
-}
-
-CONTEXT_SCHEMA = {
-    "name": "retaindb_context",
-    "description": "Synthesized 'what matters now' context block for the current task.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {"type": "string", "description": "Current task or question."},
-        },
-        "required": ["query"],
-    },
-}
-
-REMEMBER_SCHEMA = {
-    "name": "retaindb_remember",
-    "description": "Persist an explicit fact or preference to long-term memory.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "content": {"type": "string", "description": "The fact to remember."},
-            "memory_type": {
-                "type": "string",
-                "enum": ["preference", "fact", "decision", "context"],
-                "description": "Category (default: fact).",
-            },
-            "importance": {
-                "type": "number",
-                "description": "Importance 0-1 (default: 0.5).",
-            },
-        },
-        "required": ["content"],
-    },
-}
-
-FORGET_SCHEMA = {
-    "name": "retaindb_forget",
-    "description": "Delete a specific memory by ID.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "memory_id": {"type": "string", "description": "Memory ID to delete."},
-        },
-        "required": ["memory_id"],
-    },
-}
-
-
-# ---------------------------------------------------------------------------
-# MemoryProvider implementation
-# ---------------------------------------------------------------------------
-
-class RetainDBMemoryProvider(MemoryProvider):
-    """RetainDB cloud memory with write-behind queue and semantic search."""
-
-    def __init__(self):
-        self._api_key = ""
-        self._base_url = _DEFAULT_BASE_URL
-        self._project = "hermes"
-        self._user_id = ""
-        self._prefetch_result = ""
-        self._prefetch_lock = threading.Lock()
-        self._prefetch_thread = None
-        self._sync_thread = None
-
-    @property
-    def name(self) -> str:
-        return "retaindb"
-
-    def is_available(self) -> bool:
-        return bool(os.environ.get("RETAINDB_API_KEY"))
-
-    def get_config_schema(self):
-        return [
-            {"key": "api_key", "description": "RetainDB API key", "secret": True, "required": True, "env_var": "RETAINDB_API_KEY", "url": "https://retaindb.com"},
-            {"key": "base_url", "description": "API endpoint", "default": "https://api.retaindb.com"},
-            {"key": "project", "description": "Project identifier", "default": "hermes"},
-        ]
-
-    def _headers(self) -> dict:
-        return {
-            "Authorization": f"Bearer {self._api_key}",
-            "Content-Type": "application/json",
-        }
-
-    def _api(self, method: str, path: str, **kwargs):
-        """Make an API call to RetainDB."""
-        import requests
-        url = f"{self._base_url}{path}"
-        resp = requests.request(method, url, headers=self._headers(), timeout=30, **kwargs)
-        resp.raise_for_status()
-        return resp.json()
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        self._api_key = os.environ.get("RETAINDB_API_KEY", "")
-        self._base_url = os.environ.get("RETAINDB_BASE_URL", _DEFAULT_BASE_URL)
-        self._user_id = kwargs.get("user_id", "default")
-        self._session_id = session_id
-
-        # Derive profile-scoped project name so different profiles don't
-        # share server-side memory.  Explicit RETAINDB_PROJECT always wins.
-        explicit_project = os.environ.get("RETAINDB_PROJECT")
-        if explicit_project:
-            self._project = explicit_project
-        else:
-            hermes_home = kwargs.get("hermes_home", "")
-            profile_name = os.path.basename(hermes_home) if hermes_home else ""
-            # Default profile (~/.hermes) → "hermes"; named profiles → "hermes-<name>"
-            if profile_name and profile_name != ".hermes":
-                self._project = f"hermes-{profile_name}"
-            else:
-                self._project = "hermes"
-
-    def system_prompt_block(self) -> str:
-        return (
-            "# RetainDB Memory\n"
-            f"Active. Project: {self._project}.\n"
-            "Use retaindb_search to find memories, retaindb_remember to store facts, "
-            "retaindb_profile for a user overview, retaindb_context for task-relevant context."
-        )
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        if self._prefetch_thread and self._prefetch_thread.is_alive():
-            self._prefetch_thread.join(timeout=3.0)
-        with self._prefetch_lock:
-            result = self._prefetch_result
-            self._prefetch_result = ""
-        if not result:
-            return ""
-        return f"## RetainDB Memory\n{result}"
-
-    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        def _run():
-            try:
-                data = self._api("POST", "/v1/recall", json={
-                    "project": self._project,
-                    "query": query,
-                    "user_id": self._user_id,
-                    "top_k": 5,
-                })
-                results = data.get("results", [])
-                if results:
-                    lines = [r.get("content", "") for r in results if r.get("content")]
-                    with self._prefetch_lock:
-                        self._prefetch_result = "\n".join(f"- {l}" for l in lines)
-            except Exception as e:
-                logger.debug("RetainDB prefetch failed: %s", e)
-
-        self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="retaindb-prefetch")
-        self._prefetch_thread.start()
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Ingest conversation turn in background (non-blocking)."""
-        def _sync():
-            try:
-                self._api("POST", "/v1/ingest", json={
-                    "project": self._project,
-                    "user_id": self._user_id,
-                    "session_id": self._session_id,
-                    "messages": [
-                        {"role": "user", "content": user_content},
-                        {"role": "assistant", "content": assistant_content},
-                    ],
-                })
-            except Exception as e:
-                logger.warning("RetainDB sync failed: %s", e)
-
-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=5.0)
-        self._sync_thread = threading.Thread(target=_sync, daemon=True, name="retaindb-sync")
-        self._sync_thread.start()
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        return [PROFILE_SCHEMA, SEARCH_SCHEMA, CONTEXT_SCHEMA, REMEMBER_SCHEMA, FORGET_SCHEMA]
-
-    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
-        try:
-            if tool_name == "retaindb_profile":
-                data = self._api("GET", f"/v1/profile/{self._project}/{self._user_id}")
-                return json.dumps(data)
-
-            elif tool_name == "retaindb_search":
-                query = args.get("query", "")
-                if not query:
-                    return json.dumps({"error": "query is required"})
-                data = self._api("POST", "/v1/search", json={
-                    "project": self._project,
-                    "user_id": self._user_id,
-                    "query": query,
-                    "top_k": min(int(args.get("top_k", 8)), 20),
-                })
-                return json.dumps(data)
-
-            elif tool_name == "retaindb_context":
-                query = args.get("query", "")
-                if not query:
-                    return json.dumps({"error": "query is required"})
-                data = self._api("POST", "/v1/recall", json={
-                    "project": self._project,
-                    "user_id": self._user_id,
-                    "query": query,
-                    "top_k": 5,
-                })
-                return json.dumps(data)
-
-            elif tool_name == "retaindb_remember":
-                content = args.get("content", "")
-                if not content:
-                    return json.dumps({"error": "content is required"})
-                data = self._api("POST", "/v1/remember", json={
-                    "project": self._project,
-                    "user_id": self._user_id,
-                    "content": content,
-                    "memory_type": args.get("memory_type", "fact"),
-                    "importance": float(args.get("importance", 0.5)),
-                })
-                return json.dumps(data)
-
-            elif tool_name == "retaindb_forget":
-                memory_id = args.get("memory_id", "")
-                if not memory_id:
-                    return json.dumps({"error": "memory_id is required"})
-                data = self._api("DELETE", f"/v1/memory/{memory_id}")
-                return json.dumps(data)
-
-            return json.dumps({"error": f"Unknown tool: {tool_name}"})
-        except Exception as e:
-            return json.dumps({"error": str(e)})
-
-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        if action == "add":
-            try:
-                self._api("POST", "/v1/remember", json={
-                    "project": self._project,
-                    "user_id": self._user_id,
-                    "content": content,
-                    "memory_type": "preference" if target == "user" else "fact",
-                })
-            except Exception as e:
-                logger.debug("RetainDB memory bridge failed: %s", e)
-
-    def shutdown(self) -> None:
-        for t in (self._prefetch_thread, self._sync_thread):
-            if t and t.is_alive():
-                t.join(timeout=5.0)
-
-
-def register(ctx) -> None:
-    """Register RetainDB as a memory provider plugin."""
-    ctx.register_memory_provider(RetainDBMemoryProvider())
--- a/plugins/memory/retaindb/plugin.yaml
+++ b/plugins/memory/retaindb/plugin.yaml
@@ -1,7 +0,0 @@
-name: retaindb
-version: 1.0.0
-description: "RetainDB — cloud memory API with hybrid search and 7 memory types."
-pip_dependencies:
-  - requests
-requires_env:
-  - RETAINDB_API_KEY
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,7 +39,7 @@ dependencies = [
 [project.optional-dependencies]
 modal = ["modal>=1.0.0,<2"]
 daytona = ["daytona>=0.148.0,<1"]
-dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
+dev = ["pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
 messaging = ["python-telegram-bot>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
 cron = ["croniter>=6.0.0,<7"]
 slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
@@ -76,10 +76,7 @@ all = [
  "hermes-agent[modal]",
  "hermes-agent[daytona]",
  "hermes-agent[messaging]",
-  # matrix excluded: python-olm (required by matrix-nio[e2e]) is upstream-broken
-  # on modern macOS (archived libolm, C++ errors with Clang 21+). Including it
-  # here causes the entire [all] install to fail, dropping all other extras.
-  # Users who need Matrix can install manually: pip install 'hermes-agent[matrix]'
+  "hermes-agent[matrix]",
  "hermes-agent[cron]",
  "hermes-agent[cli]",
  "hermes-agent[dev]",
@@ -105,7 +102,7 @@ hermes-acp = "acp_adapter.entry:main"
 py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "rl_cli", "utils"]

 [tool.setuptools.packages.find]
-include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
+include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "honcho_integration", "acp_adapter"]

 [tool.pytest.ini_options]
 testpaths = ["tests"]
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,7 +15,6 @@ requests
 jinja2
 pydantic>=2.0
 PyJWT[crypto]
-debugpy

 # Web tools
 firecrawl-py
--- a/run_agent.py
+++ b/run_agent.py
--- a/skills/research/ml-paper-writing/SKILL.md
+++ b/skills/research/ml-paper-writing/SKILL.md
@@ -0,0 +1,940 @@
+---
+name: ml-paper-writing
+description: Write publication-ready ML/AI papers for NeurIPS, ICML, ICLR, ACL, AAAI, COLM. Use when drafting papers from research repos, structuring arguments, verifying citations, or preparing camera-ready submissions. Includes LaTeX templates, reviewer guidelines, and citation verification workflows.
+version: 1.0.0
+author: Orchestra Research
+license: MIT
+dependencies: [semanticscholar, arxiv, habanero, requests]
+metadata:
+  hermes:
+    tags: [Academic Writing, NeurIPS, ICML, ICLR, ACL, AAAI, COLM, LaTeX, Paper Writing, Citations, Research]
+
+---
+
+# ML Paper Writing for Top AI Conferences
+
+Expert-level guidance for writing publication-ready papers targeting **NeurIPS, ICML, ICLR, ACL, AAAI, and COLM**. This skill combines writing philosophy from top researchers (Nanda, Farquhar, Karpathy, Lipton, Steinhardt) with practical tools: LaTeX templates, citation verification APIs, and conference checklists.
+
+## Core Philosophy: Collaborative Writing
+
+**Paper writing is collaborative, but Claude should be proactive in delivering drafts.**
+
+The typical workflow starts with a research repository containing code, results, and experimental artifacts. Claude's role is to:
+
+1. **Understand the project** by exploring the repo, results, and existing documentation
+2. **Deliver a complete first draft** when confident about the contribution
+3. **Search literature** using web search and APIs to find relevant citations
+4. **Refine through feedback cycles** when the scientist provides input
+5. **Ask for clarification** only when genuinely uncertain about key decisions
+
+**Key Principle**: Be proactive. If the repo and results are clear, deliver a full draft. Don't block waiting for feedback on every section—scientists are busy. Produce something concrete they can react to, then iterate based on their response.
+
+---
+
+## ⚠️ CRITICAL: Never Hallucinate Citations
+
+**This is the most important rule in academic writing with AI assistance.**
+
+### The Problem
+AI-generated citations have a **~40% error rate**. Hallucinated references—papers that don't exist, wrong authors, incorrect years, fabricated DOIs—are a serious form of academic misconduct that can result in desk rejection or retraction.
+
+### The Rule
+**NEVER generate BibTeX entries from memory. ALWAYS fetch programmatically.**
+
+| Action | ✅ Correct | ❌ Wrong |
+|--------|-----------|----------|
+| Adding a citation | Search API → verify → fetch BibTeX | Write BibTeX from memory |
+| Uncertain about a paper | Mark as `[CITATION NEEDED]` | Guess the reference |
+| Can't find exact paper | Note: "placeholder - verify" | Invent similar-sounding paper |
+
+### When You Can't Verify a Citation
+
+If you cannot programmatically verify a citation, you MUST:
+
+```latex
+% EXPLICIT PLACEHOLDER - requires human verification
+\cite{PLACEHOLDER_author2024_verify_this}  % TODO: Verify this citation exists
+```
+
+**Always tell the scientist**: "I've marked [X] citations as placeholders that need verification. I could not confirm these papers exist."
+
+### Recommended: Install Exa MCP for Paper Search
+
+For the best paper search experience, install **Exa MCP** which provides real-time academic search:
+
+**Claude Code:**
+```bash
+claude mcp add exa -- npx -y mcp-remote "https://mcp.exa.ai/mcp"
+```
+
+**Cursor / VS Code** (add to MCP settings):
+```json
+{
+  "mcpServers": {
+    "exa": {
+      "type": "http",
+      "url": "https://mcp.exa.ai/mcp"
+    }
+  }
+}
+```
+
+Exa MCP enables searches like:
+- "Find papers on RLHF for language models published after 2023"
+- "Search for transformer architecture papers by Vaswani"
+- "Get recent work on sparse autoencoders for interpretability"
+
+Then verify results with Semantic Scholar API and fetch BibTeX via DOI.
+
+---
+
+## Workflow 0: Starting from a Research Repository
+
+When beginning paper writing, start by understanding the project:
+
+```
+Project Understanding:
+- [ ] Step 1: Explore the repository structure
+- [ ] Step 2: Read README, existing docs, and key results
+- [ ] Step 3: Identify the main contribution with the scientist
+- [ ] Step 4: Find papers already cited in the codebase
+- [ ] Step 5: Search for additional relevant literature
+- [ ] Step 6: Outline the paper structure together
+- [ ] Step 7: Draft sections iteratively with feedback
+```
+
+**Step 1: Explore the Repository**
+
+```bash
+# Understand project structure
+ls -la
+find . -name "*.py" | head -20
+find . -name "*.md" -o -name "*.txt" | xargs grep -l -i "result\|conclusion\|finding"
+```
+
+Look for:
+- `README.md` - Project overview and claims
+- `results/`, `outputs/`, `experiments/` - Key findings
+- `configs/` - Experimental settings
+- Existing `.bib` files or citation references
+- Any draft documents or notes
+
+**Step 2: Identify Existing Citations**
+
+Check for papers already referenced in the codebase:
+
+```bash
+# Find existing citations
+grep -r "arxiv\|doi\|cite" --include="*.md" --include="*.bib" --include="*.py"
+find . -name "*.bib"
+```
+
+These are high-signal starting points for Related Work—the scientist has already deemed them relevant.
+
+**Step 3: Clarify the Contribution**
+
+Before writing, explicitly confirm with the scientist:
+
+> "Based on my understanding of the repo, the main contribution appears to be [X].
+> The key results show [Y]. Is this the framing you want for the paper,
+> or should we emphasize different aspects?"
+
+**Never assume the narrative—always verify with the human.**
+
+**Step 4: Search for Additional Literature**
+
+Use web search to find relevant papers:
+
+```
+Search queries to try:
+- "[main technique] + [application domain]"
+- "[baseline method] comparison"
+- "[problem name] state-of-the-art"
+- Author names from existing citations
+```
+
+Then verify and retrieve BibTeX using the citation workflow below.
+
+**Step 5: Deliver a First Draft**
+
+**Be proactive—deliver a complete draft rather than asking permission for each section.**
+
+If the repo provides clear results and the contribution is apparent:
+1. Write the full first draft end-to-end
+2. Present the complete draft for feedback
+3. Iterate based on scientist's response
+
+If genuinely uncertain about framing or major claims:
+1. Draft what you can confidently
+2. Flag specific uncertainties: "I framed X as the main contribution—let me know if you'd prefer to emphasize Y instead"
+3. Continue with the draft rather than blocking
+
+**Questions to include with the draft** (not before):
+- "I emphasized X as the main contribution—adjust if needed"
+- "I highlighted results A, B, C—let me know if others are more important"
+- "Related work section includes [papers]—add any I missed"
+
+---
+
+## When to Use This Skill
+
+Use this skill when:
+- **Starting from a research repo** to write a paper
+- **Drafting or revising** specific sections
+- **Finding and verifying citations** for related work
+- **Formatting** for conference submission
+- **Resubmitting** to a different venue (format conversion)
+- **Iterating** on drafts with scientist feedback
+
+**Always remember**: First drafts are starting points for discussion, not final outputs.
+
+---
+
+## Balancing Proactivity and Collaboration
+
+**Default: Be proactive. Deliver drafts, then iterate.**
+
+| Confidence Level | Action |
+|-----------------|--------|
+| **High** (clear repo, obvious contribution) | Write full draft, deliver, iterate on feedback |
+| **Medium** (some ambiguity) | Write draft with flagged uncertainties, continue |
+| **Low** (major unknowns) | Ask 1-2 targeted questions, then draft |
+
+**Draft first, ask with the draft** (not before):
+
+| Section | Draft Autonomously | Flag With Draft |
+|---------|-------------------|-----------------|
+| Abstract | Yes | "Framed contribution as X—adjust if needed" |
+| Introduction | Yes | "Emphasized problem Y—correct if wrong" |
+| Methods | Yes | "Included details A, B, C—add missing pieces" |
+| Experiments | Yes | "Highlighted results 1, 2, 3—reorder if needed" |
+| Related Work | Yes | "Cited papers X, Y, Z—add any I missed" |
+
+**Only block for input when:**
+- Target venue is unclear (affects page limits, framing)
+- Multiple contradictory framings seem equally valid
+- Results seem incomplete or inconsistent
+- Explicit request to review before continuing
+
+**Don't block for:**
+- Word choice decisions
+- Section ordering
+- Which specific results to show (make a choice, flag it)
+- Citation completeness (draft with what you find, note gaps)
+
+---
+
+## The Narrative Principle
+
+**The single most critical insight**: Your paper is not a collection of experiments—it's a story with one clear contribution supported by evidence.
+
+Every successful ML paper centers on what Neel Nanda calls "the narrative": a short, rigorous, evidence-based technical story with a takeaway readers care about.
+
+**Three Pillars (must be crystal clear by end of introduction):**
+
+| Pillar | Description | Example |
+|--------|-------------|---------|
+| **The What** | 1-3 specific novel claims within cohesive theme | "We prove that X achieves Y under condition Z" |
+| **The Why** | Rigorous empirical evidence supporting claims | Strong baselines, experiments distinguishing hypotheses |
+| **The So What** | Why readers should care | Connection to recognized community problems |
+
+**If you cannot state your contribution in one sentence, you don't yet have a paper.**
+
+---
+
+## Paper Structure Workflow
+
+### Workflow 1: Writing a Complete Paper (Iterative)
+
+Copy this checklist and track progress. **Each step involves drafting → feedback → revision:**
+
+```
+Paper Writing Progress:
+- [ ] Step 1: Define the one-sentence contribution (with scientist)
+- [ ] Step 2: Draft Figure 1 → get feedback → revise
+- [ ] Step 3: Draft abstract → get feedback → revise
+- [ ] Step 4: Draft introduction → get feedback → revise
+- [ ] Step 5: Draft methods → get feedback → revise
+- [ ] Step 6: Draft experiments → get feedback → revise
+- [ ] Step 7: Draft related work → get feedback → revise
+- [ ] Step 8: Draft limitations → get feedback → revise
+- [ ] Step 9: Complete paper checklist (required)
+- [ ] Step 10: Final review cycle and submission
+```
+
+**Step 1: Define the One-Sentence Contribution**
+
+**This step requires explicit confirmation from the scientist.**
+
+Before writing anything, articulate and verify:
+- What is the single thing your paper contributes?
+- What was not obvious or present before your work?
+
+> "I propose framing the contribution as: '[one sentence]'. Does this capture
+> what you see as the main takeaway? Should we adjust the emphasis?"
+
+**Step 2: Draft Figure 1**
+
+Figure 1 deserves special attention—many readers skip directly to it.
+- Convey core idea, approach, or most compelling result
+- Use vector graphics (PDF/EPS for plots)
+- Write captions that stand alone without main text
+- Ensure readability in black-and-white (8% of men have color vision deficiency)
+
+**Step 3: Write Abstract (5-Sentence Formula)**
+
+From Sebastian Farquhar (DeepMind):
+
+```
+1. What you achieved: "We introduce...", "We prove...", "We demonstrate..."
+2. Why this is hard and important
+3. How you do it (with specialist keywords for discoverability)
+4. What evidence you have
+5. Your most remarkable number/result
+```
+
+**Delete** generic openings like "Large language models have achieved remarkable success..."
+
+**Step 4: Write Introduction (1-1.5 pages max)**
+
+Must include:
+- 2-4 bullet contribution list (max 1-2 lines each in two-column format)
+- Clear problem statement
+- Brief approach overview
+- Methods should start by page 2-3 maximum
+
+**Step 5: Methods Section**
+
+Enable reimplementation:
+- Conceptual outline or pseudocode
+- All hyperparameters listed
+- Architectural details sufficient for reproduction
+- Present final design decisions; ablations go in experiments
+
+**Step 6: Experiments Section**
+
+For each experiment, explicitly state:
+- What claim it supports
+- How it connects to main contribution
+- Experimental setting (details in appendix)
+- What to observe: "the blue line shows X, which demonstrates Y"
+
+Requirements:
+- Error bars with methodology (standard deviation vs standard error)
+- Hyperparameter search ranges
+- Compute infrastructure (GPU type, total hours)
+- Seed-setting methods
+
+**Step 7: Related Work**
+
+Organize methodologically, not paper-by-paper:
+
+**Good:** "One line of work uses Floogledoodle's assumption [refs] whereas we use Doobersnoddle's assumption because..."
+
+**Bad:** "Snap et al. introduced X while Crackle et al. introduced Y."
+
+Cite generously—reviewers likely authored relevant papers.
+
+**Step 8: Limitations Section (REQUIRED)**
+
+All major conferences require this. Counter-intuitively, honesty helps:
+- Reviewers are instructed not to penalize honest limitation acknowledgment
+- Pre-empt criticisms by identifying weaknesses first
+- Explain why limitations don't undermine core claims
+
+**Step 9: Paper Checklist**
+
+NeurIPS, ICML, and ICLR all require paper checklists. See [references/checklists.md](references/checklists.md).
+
+---
+
+## Writing Philosophy for Top ML Conferences
+
+**This section distills the most important writing principles from leading ML researchers.** These aren't optional style suggestions—they're what separates accepted papers from rejected ones.
+
+> "A paper is a short, rigorous, evidence-based technical story with a takeaway readers care about." — Neel Nanda
+
+### The Sources Behind This Guidance
+
+This skill synthesizes writing philosophy from researchers who have published extensively at top venues:
+
+| Source | Key Contribution | Link |
+|--------|-----------------|------|
+| **Neel Nanda** (Google DeepMind) | The Narrative Principle, What/Why/So What framework | [How to Write ML Papers](https://www.alignmentforum.org/posts/eJGptPbbFPZGLpjsp/highly-opinionated-advice-on-how-to-write-ml-papers) |
+| **Sebastian Farquhar** (DeepMind) | 5-sentence abstract formula | [How to Write ML Papers](https://sebastianfarquhar.com/on-research/2024/11/04/how_to_write_ml_papers/) |
+| **Gopen & Swan** | 7 principles of reader expectations | [Science of Scientific Writing](https://cseweb.ucsd.edu/~swanson/papers/science-of-writing.pdf) |
+| **Zachary Lipton** | Word choice, eliminating hedging | [Heuristics for Scientific Writing](https://www.approximatelycorrect.com/2018/01/29/heuristics-technical-scientific-writing-machine-learning-perspective/) |
+| **Jacob Steinhardt** (UC Berkeley) | Precision, consistent terminology | [Writing Tips](https://bounded-regret.ghost.io/) |
+| **Ethan Perez** (Anthropic) | Micro-level clarity tips | [Easy Paper Writing Tips](https://ethanperez.net/easy-paper-writing-tips/) |
+| **Andrej Karpathy** | Single contribution focus | Various lectures |
+
+**For deeper dives into any of these, see:**
+- [references/writing-guide.md](references/writing-guide.md) - Full explanations with examples
+- [references/sources.md](references/sources.md) - Complete bibliography
+
+### Time Allocation (From Neel Nanda)
+
+Spend approximately **equal time** on each of:
+1. The abstract
+2. The introduction
+3. The figures
+4. Everything else combined
+
+**Why?** Most reviewers form judgments before reaching your methods. Readers encounter your paper as: **title → abstract → introduction → figures → maybe the rest.**
+
+### Writing Style Guidelines
+
+#### Sentence-Level Clarity (Gopen & Swan's 7 Principles)
+
+These principles are based on how readers actually process prose. Violating them forces readers to spend cognitive effort on structure rather than content.
+
+| Principle | Rule | Example |
+|-----------|------|---------|
+| **Subject-verb proximity** | Keep subject and verb close | ❌ "The model, which was trained on..., achieves" → ✅ "The model achieves... after training on..." |
+| **Stress position** | Place emphasis at sentence ends | ❌ "Accuracy improves by 15% when using attention" → ✅ "When using attention, accuracy improves by **15%**" |
+| **Topic position** | Put context first, new info after | ✅ "Given these constraints, we propose..." |
+| **Old before new** | Familiar info → unfamiliar info | Link backward, then introduce new |
+| **One unit, one function** | Each paragraph makes one point | Split multi-point paragraphs |
+| **Action in verb** | Use verbs, not nominalizations | ❌ "We performed an analysis" → ✅ "We analyzed" |
+| **Context before new** | Set stage before presenting | Explain before showing equation |
+
+**Full 7 principles with detailed examples:** See [references/writing-guide.md](references/writing-guide.md#the-7-principles-of-reader-expectations)
+
+#### Micro-Level Tips (Ethan Perez)
+
+These small changes accumulate into significantly clearer prose:
+
+- **Minimize pronouns**: ❌ "This shows..." → ✅ "This result shows..."
+- **Verbs early**: Position verbs near sentence start
+- **Unfold apostrophes**: ❌ "X's Y" → ✅ "The Y of X" (when awkward)
+- **Delete filler words**: "actually," "a bit," "very," "really," "basically," "quite," "essentially"
+
+**Full micro-tips with examples:** See [references/writing-guide.md](references/writing-guide.md#micro-level-writing-tips)
+
+#### Word Choice (Zachary Lipton)
+
+- **Be specific**: ❌ "performance" → ✅ "accuracy" or "latency" (say what you mean)
+- **Eliminate hedging**: Drop "may" and "can" unless genuinely uncertain
+- **Avoid incremental vocabulary**: ❌ "combine," "modify," "expand" → ✅ "develop," "propose," "introduce"
+- **Delete intensifiers**: ❌ "provides *very* tight approximation" → ✅ "provides tight approximation"
+
+#### Precision Over Brevity (Jacob Steinhardt)
+
+- **Consistent terminology**: Different terms for same concept creates confusion. Pick one and stick with it.
+- **State assumptions formally**: Before theorems, list all assumptions explicitly
+- **Intuition + rigor**: Provide intuitive explanations alongside formal proofs
+
+### What Reviewers Actually Read
+
+Understanding reviewer behavior helps prioritize your effort:
+
+| Paper Section | % Reviewers Who Read | Implication |
+|---------------|---------------------|-------------|
+| Abstract | 100% | Must be perfect |
+| Introduction | 90%+ (skimmed) | Front-load contribution |
+| Figures | Examined before methods | Figure 1 is critical |
+| Methods | Only if interested | Don't bury the lede |
+| Appendix | Rarely | Put only supplementary details |
+
+**Bottom line**: If your abstract and intro don't hook reviewers, they may never read your brilliant methods section.
+
+---
+
+## Conference Requirements Quick Reference
+
+| Conference | Page Limit | Extra for Camera-Ready | Key Requirement |
+|------------|------------|------------------------|-----------------|
+| **NeurIPS 2025** | 9 pages | +0 | Mandatory checklist, lay summary for accepted |
+| **ICML 2026** | 8 pages | +1 | Broader Impact Statement required |
+| **ICLR 2026** | 9 pages | +1 | LLM disclosure required, reciprocal reviewing |
+| **ACL 2025** | 8 pages (long) | varies | Limitations section mandatory |
+| **AAAI 2026** | 7 pages | +1 | Strict style file adherence |
+| **COLM 2025** | 9 pages | +1 | Focus on language models |
+
+**Universal Requirements:**
+- Double-blind review (anonymize submissions)
+- References don't count toward page limit
+- Appendices unlimited but reviewers not required to read
+- LaTeX required for all venues
+
+**LaTeX Templates:** See [templates/](templates/) directory for all conference templates.
+
+---
+
+## Using LaTeX Templates Properly
+
+### Workflow 4: Starting a New Paper from Template
+
+**Always copy the entire template directory first, then write within it.**
+
+```
+Template Setup Checklist:
+- [ ] Step 1: Copy entire template directory to new project
+- [ ] Step 2: Verify template compiles as-is (before any changes)
+- [ ] Step 3: Read the template's example content to understand structure
+- [ ] Step 4: Replace example content section by section
+- [ ] Step 5: Keep template comments/examples as reference until done
+- [ ] Step 6: Clean up template artifacts only at the end
+```
+
+**Step 1: Copy the Full Template**
+
+```bash
+# Create your paper directory with the complete template
+cp -r templates/neurips2025/ ~/papers/my-new-paper/
+cd ~/papers/my-new-paper/
+
+# Verify structure is complete
+ls -la
+# Should see: main.tex, neurips.sty, Makefile, etc.
+```
+
+**⚠️ IMPORTANT**: Copy the ENTIRE directory, not just `main.tex`. Templates include:
+- Style files (`.sty`) - required for compilation
+- Bibliography styles (`.bst`) - required for references
+- Example content - useful as reference
+- Makefiles - for easy compilation
+
+**Step 2: Verify Template Compiles First**
+
+Before making ANY changes, compile the template as-is:
+
+```bash
+# Using latexmk (recommended)
+latexmk -pdf main.tex
+
+# Or manual compilation
+pdflatex main.tex
+bibtex main
+pdflatex main.tex
+pdflatex main.tex
+```
+
+If the unmodified template doesn't compile, fix that first. Common issues:
+- Missing TeX packages → install via `tlmgr install <package>`
+- Wrong TeX distribution → use TeX Live (recommended)
+
+**Step 3: Keep Template Content as Reference**
+
+Don't immediately delete all example content. Instead:
+
+```latex
+% KEEP template examples commented out as you write
+% This shows you the expected format
+
+% Template example (keep for reference):
+% \begin{figure}[t]
+%   \centering
+%   \includegraphics[width=0.8\linewidth]{example-image}
+%   \caption{Template shows caption style}
+% \end{figure}
+
+% Your actual figure:
+\begin{figure}[t]
+  \centering
+  \includegraphics[width=0.8\linewidth]{your-figure.pdf}
+  \caption{Your caption following the same style.}
+\end{figure}
+```
+
+**Step 4: Replace Content Section by Section**
+
+Work through the paper systematically:
+
+```
+Replacement Order:
+1. Title and authors (anonymize for submission)
+2. Abstract
+3. Introduction
+4. Methods
+5. Experiments
+6. Related Work
+7. Conclusion
+8. References (your .bib file)
+9. Appendix
+```
+
+For each section:
+1. Read the template's example content
+2. Note any special formatting or macros used
+3. Replace with your content following the same patterns
+4. Compile frequently to catch errors early
+
+**Step 5: Use Template Macros**
+
+Templates often define useful macros. Check the preamble for:
+
+```latex
+% Common template macros to use:
+\newcommand{\method}{YourMethodName}  % Consistent method naming
+\newcommand{\eg}{e.g.,\xspace}        % Proper abbreviations
+\newcommand{\ie}{i.e.,\xspace}
+\newcommand{\etal}{\textit{et al.}\xspace}
+```
+
+**Step 6: Clean Up Only at the End**
+
+Only remove template artifacts when paper is nearly complete:
+
+```latex
+% BEFORE SUBMISSION - remove these:
+% - Commented-out template examples
+% - Unused packages
+% - Template's example figures/tables
+% - Lorem ipsum or placeholder text
+
+% KEEP these:
+% - All style files (.sty)
+% - Bibliography style (.bst)
+% - Required packages from template
+% - Any custom macros you're using
+```
+
+### Template Pitfalls to Avoid
+
+| Pitfall | Problem | Solution |
+|---------|---------|----------|
+| Copying only `main.tex` | Missing `.sty`, won't compile | Copy entire directory |
+| Modifying `.sty` files | Breaks conference formatting | Never edit style files |
+| Adding random packages | Conflicts, breaks template | Only add if necessary |
+| Deleting template content too early | Lose formatting reference | Keep as comments until done |
+| Not compiling frequently | Errors accumulate | Compile after each section |
+
+### Quick Template Reference
+
+| Conference | Main File | Key Style File | Notes |
+|------------|-----------|----------------|-------|
+| NeurIPS 2025 | `main.tex` | `neurips.sty` | Has Makefile |
+| ICML 2026 | `example_paper.tex` | `icml2026.sty` | Includes algorithm packages |
+| ICLR 2026 | `iclr2026_conference.tex` | `iclr2026_conference.sty` | Has math_commands.tex |
+| ACL | `acl_latex.tex` | `acl.sty` | Strict formatting |
+| AAAI 2026 | `aaai2026-unified-template.tex` | `aaai2026.sty` | Very strict compliance |
+| COLM 2025 | `colm2025_conference.tex` | `colm2025_conference.sty` | Similar to ICLR |
+
+---
+
+## Conference Resubmission & Format Conversion
+
+When a paper is rejected or withdrawn from one venue and resubmitted to another, format conversion is required. This is a common workflow in ML research.
+
+### Workflow 3: Converting Between Conference Formats
+
+```
+Format Conversion Checklist:
+- [ ] Step 1: Identify source and target template differences
+- [ ] Step 2: Create new project with target template
+- [ ] Step 3: Copy content sections (not preamble)
+- [ ] Step 4: Adjust page limits and content
+- [ ] Step 5: Update conference-specific requirements
+- [ ] Step 6: Verify compilation and formatting
+```
+
+**Step 1: Key Template Differences**
+
+| From → To | Page Change | Key Adjustments |
+|-----------|-------------|-----------------|
+| NeurIPS → ICML | 9 → 8 pages | Cut 1 page, add Broader Impact if missing |
+| ICML → ICLR | 8 → 9 pages | Can expand experiments, add LLM disclosure |
+| NeurIPS → ACL | 9 → 8 pages | Restructure for NLP conventions, add Limitations |
+| ICLR → AAAI | 9 → 7 pages | Significant cuts needed, strict style adherence |
+| Any → COLM | varies → 9 | Reframe for language model focus |
+
+**Step 2: Content Migration (NOT Template Merge)**
+
+**Never copy LaTeX preambles between templates.** Instead:
+
+```bash
+# 1. Start fresh with target template
+cp -r templates/icml2026/ new_submission/
+
+# 2. Copy ONLY content sections from old paper
+# - Abstract text
+# - Section content (between \section{} commands)
+# - Figures and tables
+# - Bibliography entries
+
+# 3. Paste into target template structure
+```
+
+**Step 3: Adjusting for Page Limits**
+
+When cutting pages (e.g., NeurIPS 9 → AAAI 7):
+- Move detailed proofs to appendix
+- Condense related work (cite surveys instead of individual papers)
+- Combine similar experiments into unified tables
+- Use smaller figure sizes with subfigures
+- Tighten writing: eliminate redundancy, use active voice
+
+When expanding (e.g., ICML 8 → ICLR 9):
+- Add ablation studies reviewers requested
+- Expand limitations discussion
+- Include additional baselines
+- Add qualitative examples
+
+**Step 4: Conference-Specific Adjustments**
+
+| Target Venue | Required Additions |
+|--------------|-------------------|
+| **ICML** | Broader Impact Statement (after conclusion) |
+| **ICLR** | LLM usage disclosure, reciprocal reviewing agreement |
+| **ACL/EMNLP** | Limitations section (mandatory), Ethics Statement |
+| **AAAI** | Strict adherence to style file (no modifications) |
+| **NeurIPS** | Paper checklist (appendix), lay summary if accepted |
+
+**Step 5: Update References**
+
+```latex
+% Remove self-citations that reveal identity (for blind review)
+% Update any "under review" citations to published versions
+% Add new relevant work published since last submission
+```
+
+**Step 6: Addressing Previous Reviews**
+
+When resubmitting after rejection:
+- **Do** address reviewer concerns in the new version
+- **Do** add experiments/clarifications reviewers requested
+- **Don't** include a "changes from previous submission" section (blind review)
+- **Don't** reference the previous submission or reviews
+
+**Common Conversion Pitfalls:**
+- ❌ Copying `\usepackage` commands (causes conflicts)
+- ❌ Keeping old conference header/footer commands
+- ❌ Forgetting to update `\bibliography{}` path
+- ❌ Missing conference-specific required sections
+- ❌ Exceeding page limit after format change
+
+---
+
+## Citation Workflow (Hallucination Prevention)
+
+**⚠️ CRITICAL**: AI-generated citations have ~40% error rate. **Never write BibTeX from memory.**
+
+### The Golden Rule
+
+```
+IF you cannot programmatically fetch a citation:
+    → Mark it as [CITATION NEEDED] or [PLACEHOLDER - VERIFY]
+    → Tell the scientist explicitly
+    → NEVER invent a plausible-sounding reference
+```
+
+### Workflow 2: Adding Citations
+
+```
+Citation Verification (MANDATORY for every citation):
+- [ ] Step 1: Search using Exa MCP or Semantic Scholar API
+- [ ] Step 2: Verify paper exists in 2+ sources (Semantic Scholar + arXiv/CrossRef)
+- [ ] Step 3: Retrieve BibTeX via DOI (programmatically, not from memory)
+- [ ] Step 4: Verify the claim you're citing actually appears in the paper
+- [ ] Step 5: Add verified BibTeX to bibliography
+- [ ] Step 6: If ANY step fails → mark as placeholder, inform scientist
+```
+
+**Step 0: Use Exa MCP for Initial Search (Recommended)**
+
+If Exa MCP is installed, use it to find relevant papers:
+```
+Search: "RLHF language model alignment 2023"
+Search: "sparse autoencoders interpretability"
+Search: "attention mechanism transformers Vaswani"
+```
+
+Then verify each result with Semantic Scholar and fetch BibTeX via DOI.
+
+**Step 1: Search Semantic Scholar**
+
+```python
+from semanticscholar import SemanticScholar
+
+sch = SemanticScholar()
+results = sch.search_paper("attention mechanism transformers", limit=5)
+for paper in results:
+    print(f"{paper.title} - {paper.paperId}")
+    print(f"  DOI: {paper.externalIds.get('DOI', 'N/A')}")
+```
+
+**Step 2: Verify Existence**
+
+Confirm paper appears in at least two sources (Semantic Scholar + CrossRef/arXiv).
+
+**Step 3: Retrieve BibTeX via DOI**
+
+```python
+import requests
+
+def doi_to_bibtex(doi: str) -> str:
+    """Get verified BibTeX from DOI via CrossRef."""
+    response = requests.get(
+        f"https://doi.org/{doi}",
+        headers={"Accept": "application/x-bibtex"}
+    )
+    response.raise_for_status()
+    return response.text
+
+# Example
+bibtex = doi_to_bibtex("10.48550/arXiv.1706.03762")
+print(bibtex)
+```
+
+**Step 4: Verify Claims**
+
+Before citing for a specific claim, access the paper and confirm the attributed claim actually appears.
+
+**Step 5: Handle Failures Explicitly**
+
+If you cannot verify a citation at ANY step:
+
+```latex
+% Option 1: Explicit placeholder
+\cite{PLACEHOLDER_smith2023_verify}  % TODO: Could not verify - scientist must confirm
+
+% Option 2: Note in text
+... as shown in prior work [CITATION NEEDED - could not verify Smith et al. 2023].
+```
+
+**Always inform the scientist:**
+> "I could not verify the following citations and have marked them as placeholders:
+> - Smith et al. 2023 on reward hacking - could not find in Semantic Scholar
+> - Jones 2022 on scaling laws - found similar paper but different authors
+> Please verify these before submission."
+
+### Summary: Citation Rules
+
+| Situation | Action |
+|-----------|--------|
+| Found paper, got DOI, fetched BibTeX | ✅ Use the citation |
+| Found paper, no DOI | ✅ Use arXiv BibTeX or manual entry from paper |
+| Paper exists but can't fetch BibTeX | ⚠️ Mark placeholder, inform scientist |
+| Uncertain if paper exists | ❌ Mark `[CITATION NEEDED]`, inform scientist |
+| "I think there's a paper about X" | ❌ **NEVER cite** - search first or mark placeholder |
+
+**🚨 NEVER generate BibTeX from memory—always fetch programmatically. 🚨**
+
+See [references/citation-workflow.md](references/citation-workflow.md) for complete API documentation.
+
+---
+
+## Common Issues and Solutions
+
+**Issue: Abstract too generic**
+
+Delete first sentence if it could be prepended to any ML paper. Start with your specific contribution.
+
+**Issue: Introduction exceeds 1.5 pages**
+
+Split background into Related Work. Front-load contribution bullets. Methods should start by page 2-3.
+
+**Issue: Experiments lack explicit claims**
+
+Add sentence before each experiment: "This experiment tests whether [specific claim]..."
+
+**Issue: Reviewers find paper hard to follow**
+
+- Add explicit signposting: "In this section, we show X"
+- Use consistent terminology throughout
+- Include figure captions that stand alone
+
+**Issue: Missing statistical significance**
+
+Always include:
+- Error bars (specify: std dev or std error)
+- Number of runs
+- Statistical tests if comparing methods
+
+---
+
+## Reviewer Evaluation Criteria
+
+Reviewers assess papers on four dimensions:
+
+| Criterion | What Reviewers Look For |
+|-----------|------------------------|
+| **Quality** | Technical soundness, well-supported claims |
+| **Clarity** | Clear writing, reproducible by experts |
+| **Significance** | Community impact, advances understanding |
+| **Originality** | New insights (doesn't require new method) |
+
+**Scoring (NeurIPS 6-point scale):**
+- 6: Strong Accept - Groundbreaking, flawless
+- 5: Accept - Technically solid, high impact
+- 4: Borderline Accept - Solid, limited evaluation
+- 3: Borderline Reject - Solid but weaknesses outweigh
+- 2: Reject - Technical flaws
+- 1: Strong Reject - Known results or ethics issues
+
+See [references/reviewer-guidelines.md](references/reviewer-guidelines.md) for detailed reviewer instructions.
+
+---
+
+## Tables and Figures
+
+### Tables
+
+Use `booktabs` LaTeX package for professional tables:
+
+```latex
+\usepackage{booktabs}
+\begin{tabular}{lcc}
+\toprule
+Method & Accuracy ↑ & Latency ↓ \\
+\midrule
+Baseline & 85.2 & 45ms \\
+\textbf{Ours} & \textbf{92.1} & 38ms \\
+\bottomrule
+\end{tabular}
+```
+
+**Rules:**
+- Bold best value per metric
+- Include direction symbols (↑ higher is better, ↓ lower is better)
+- Right-align numerical columns
+- Consistent decimal precision
+
+### Figures
+
+- **Vector graphics** (PDF, EPS) for all plots and diagrams
+- **Raster** (PNG 600 DPI) only for photographs
+- Use **colorblind-safe palettes** (Okabe-Ito or Paul Tol)
+- Verify **grayscale readability** (8% of men have color vision deficiency)
+- **No title inside figure**—the caption serves this function
+- **Self-contained captions**—reader should understand without main text
+
+---
+
+## References & Resources
+
+### Reference Documents (Deep Dives)
+
+| Document | Contents |
+|----------|----------|
+| [writing-guide.md](references/writing-guide.md) | Gopen & Swan 7 principles, Ethan Perez micro-tips, word choice |
+| [citation-workflow.md](references/citation-workflow.md) | Citation APIs, Python code, BibTeX management |
+| [checklists.md](references/checklists.md) | NeurIPS 16-item, ICML, ICLR, ACL requirements |
+| [reviewer-guidelines.md](references/reviewer-guidelines.md) | Evaluation criteria, scoring, rebuttals |
+| [sources.md](references/sources.md) | Complete bibliography of all sources |
+
+### LaTeX Templates
+
+Templates in `templates/` directory: **ICML 2026**, **ICLR 2026**, **NeurIPS 2025**, **ACL/EMNLP**, **AAAI 2026**, **COLM 2025**.
+
+**Compiling to PDF:**
+- **VS Code/Cursor**: Install LaTeX Workshop extension + TeX Live → Save to auto-compile
+- **Command line**: `latexmk -pdf main.tex` or `pdflatex` + `bibtex` workflow
+- **Online**: Upload to [Overleaf](https://overleaf.com)
+
+See [templates/README.md](templates/README.md) for detailed setup instructions.
+
+### Key External Sources
+
+**Writing Philosophy:**
+- [Neel Nanda: How to Write ML Papers](https://www.alignmentforum.org/posts/eJGptPbbFPZGLpjsp/highly-opinionated-advice-on-how-to-write-ml-papers) - Narrative, "What/Why/So What"
+- [Farquhar: How to Write ML Papers](https://sebastianfarquhar.com/on-research/2024/11/04/how_to_write_ml_papers/) - 5-sentence abstract
+- [Gopen & Swan: Science of Scientific Writing](https://cseweb.ucsd.edu/~swanson/papers/science-of-writing.pdf) - 7 reader expectation principles
+- [Lipton: Heuristics for Scientific Writing](https://www.approximatelycorrect.com/2018/01/29/heuristics-technical-scientific-writing-machine-learning-perspective/) - Word choice
+- [Perez: Easy Paper Writing Tips](https://ethanperez.net/easy-paper-writing-tips/) - Micro-level clarity
+
+**APIs:** [Semantic Scholar](https://api.semanticscholar.org/api-docs/) | [CrossRef](https://www.crossref.org/documentation/retrieve-metadata/rest-api/) | [arXiv](https://info.arxiv.org/help/api/basics.html)
+
+**Venues:** [NeurIPS](https://neurips.cc/Conferences/2025/PaperInformation/StyleFiles) | [ICML](https://icml.cc/Conferences/2025/AuthorInstructions) | [ICLR](https://iclr.cc/Conferences/2026/AuthorGuide) | [ACL](https://github.com/acl-org/acl-style-files)
+
--- a/skills/research/research-paper-writing/references/checklists.md
+++ b/skills/research/research-paper-writing/references/checklists.md
@@ -10,8 +10,6 @@ This reference documents the mandatory checklist requirements for major ML/AI co
 - [ICML Paper Checklist](#icml-paper-checklist)
 - [ICLR Requirements](#iclr-requirements)
 - [ACL Requirements](#acl-requirements)
- [AAAI Requirements](#aaai-requirements)
- [COLM Requirements](#colm-requirements)
 - [Universal Pre-Submission Checklist](#universal-pre-submission-checklist)

 ---
@@ -282,77 +280,6 @@ If applicable:

 ---

-## AAAI Requirements
-
-### Formatting (Strictest of All Venues)
-
-AAAI enforces formatting rules more strictly than any other major venue. Papers that deviate from the template are desk-rejected.
-
- [ ] Use the **exact** AAAI style file without modification — no `\setlength`, no `\vspace` hacks, no font overrides
- [ ] 7 pages main content (8 for camera-ready with author info)
- [ ] Two-column format, Times font (set by template)
- [ ] References and appendices do not count toward page limit
- [ ] Abstract must be a single paragraph
- [ ] Do not modify margins, column widths, or font sizes
-
-### Required Sections
-
- [ ] Abstract (single paragraph, no math or citations)
- [ ] Introduction with clear contribution statement
- [ ] References in AAAI format (uses `aaai2026.bst`)
- [ ] Appendix (optional, unlimited)
-
-### Ethics and Reproducibility
-
- [ ] Broader impact statement (encouraged but not always mandatory — check current year's CFP)
- [ ] Reproducibility details (datasets, code availability)
- [ ] Acknowledge use of AI writing tools if applicable
-
-### Key Differences from Other Venues
-
- **No separate limitations section required** (unlike ACL), but discussing limitations is recommended
- **Strictest formatting enforcement** — the style checker will reject non-compliant PDFs
- **No paper checklist** like NeurIPS has, but the universal checklist below still applies
- **Unified template** covers main paper and supplementary in the same file
-
---
-
-## COLM Requirements
-
-### Overview
-
-COLM (Conference on Language Modeling) focuses specifically on language model research. Framing must target this community.
-
-### Formatting
-
- [ ] 9 pages main content (10 for camera-ready)
- [ ] Use COLM template (based on ICLR template with modifications)
- [ ] Double-blind review
- [ ] References and appendices unlimited
-
-### Required Sections
-
- [ ] Abstract
- [ ] Introduction framed for language modeling community
- [ ] Conclusion
- [ ] References
-
-### Content Expectations
-
- [ ] Contribution must be relevant to language models (broadly interpreted: training, evaluation, applications, theory, alignment, safety)
- [ ] If the method is general, frame with language model examples
- [ ] Baselines should include recent LM-specific methods where applicable
-
-### Key Differences from Other Venues
-
- **Narrower scope** than NeurIPS/ICML — must frame for LM community
- **Template derived from ICLR** — similar formatting rules
- **Newer venue** — reviewer norms are still establishing; err on the side of thorough evaluation
- **No mandatory checklist** like NeurIPS, but broader impact discussion is expected
- **LLM disclosure**: If LLMs were used in research (code generation, data annotation, writing assistance), disclose this
-
---
-
 ## Universal Pre-Submission Checklist

 ### Before Every Submission
--- a/skills/research/research-paper-writing/references/citation-workflow.md
+++ b/skills/research/research-paper-writing/references/citation-workflow.md
@@ -289,7 +289,7 @@ class CitationManager:
                )
                if resp.status_code == 200:
                    sources.append("CrossRef")
-            except Exception:
+            except:
                pass

        # Check arXiv if ID available
@@ -301,7 +301,7 @@ class CitationManager:
                )
                if "<entry>" in resp.text and "<title>" in resp.text:
                    sources.append("arXiv")
-            except Exception:
+            except:
                pass

        return len(sources) >= 2, sources
@@ -318,7 +318,7 @@ class CitationManager:
                )
                if resp.status_code == 200:
                    return resp.text
-            except Exception:
+            except:
                pass

        # Fallback: generate from paper data
@@ -419,7 +419,7 @@ def batch_cite(queries: List[str], output_file: str = "references.bib"):
 | Customization | Limited | Highly flexible |
 | Backend | bibtex | Biber (recommended) |

-**Recommendation**: Use natbib with BibTeX for conference submissions — all major venue templates (NeurIPS, ICML, ICLR, ACL, AAAI, COLM) ship with natbib and `.bst` files. BibLaTeX with Biber is an option for journals or personal projects where you control the template.
+**Recommendation**: Use BibLaTeX with Biber for new papers.

 ### LaTeX Setup

--- a/skills/research/research-paper-writing/references/reviewer-guidelines.md
+++ b/skills/research/research-paper-writing/references/reviewer-guidelines.md
@@ -105,7 +105,7 @@ Reviewers are explicitly instructed to:
 - Penalizing authors for honest limitation acknowledgment
 - Rejecting for missing citations to reviewer's own work

-### Timeline (NeurIPS 2025 — verify dates for current year)
+### Timeline (NeurIPS 2025)

 - Bidding: May 17-21
 - Reviewing period: May 29 - July 2
@@ -113,8 +113,6 @@ Reviewers are explicitly instructed to:
 - Discussion period: July 31 - August 13
 - Final notifications: September 18

-> **Note**: These dates are from the 2025 cycle. Always check the current year's call for papers at the venue website.
-
 ---

 ## ICML Reviewer Guidelines
@@ -200,70 +198,6 @@ ACL has a dedicated ethics review process for:

 ---

-## AAAI Reviewer Guidelines
-
-### Evaluation Criteria
-
-AAAI reviewers evaluate along similar axes to NeurIPS/ICML but with some differences:
-
-| Criterion | Weight | Notes |
-|-----------|--------|-------|
-| **Technical quality** | High | Soundness of approach, correctness of results |
-| **Significance** | High | Importance of the problem and contribution |
-| **Novelty** | Medium-High | New ideas, methods, or insights |
-| **Clarity** | Medium | Clear writing, well-organized presentation |
-| **Reproducibility** | Medium | Sufficient detail to reproduce results |
-
-### AAAI-Specific Considerations
-
- **Broader AI scope**: AAAI covers all of AI, not just ML. Papers on planning, reasoning, knowledge representation, NLP, vision, robotics, and multi-agent systems are all in scope. Reviewers may not be deep ML specialists.
- **Formatting strictness**: AAAI reviewers are instructed to flag formatting violations. Non-compliant papers may be desk-rejected before review.
- **Application papers**: AAAI is more receptive to application-focused work than NeurIPS/ICML. Framing a strong application contribution is viable.
- **Senior Program Committee**: AAAI uses SPCs (Senior Program Committee members) who mediate between reviewers and make accept/reject recommendations.
-
-### Scoring (AAAI Scale)
-
- **Strong Accept**: Clearly above threshold, excellent contribution
- **Accept**: Above threshold, good contribution with minor issues
- **Weak Accept**: Borderline, merits outweigh concerns
- **Weak Reject**: Borderline, concerns outweigh merits
- **Reject**: Below threshold, significant issues
- **Strong Reject**: Well below threshold
-
---
-
-## COLM Reviewer Guidelines
-
-### Evaluation Criteria
-
-COLM reviews focus on relevance to language modeling in addition to standard criteria:
-
-| Criterion | Weight | Notes |
-|-----------|--------|-------|
-| **Relevance** | High | Must be relevant to language modeling community |
-| **Technical quality** | High | Sound methodology, well-supported claims |
-| **Novelty** | Medium-High | New insights about language models |
-| **Clarity** | Medium | Clear presentation, reproducible |
-| **Significance** | Medium-High | Impact on LM research and practice |
-
-### COLM-Specific Considerations
-
- **Language model focus**: Reviewers will assess whether the contribution advances understanding of language models. General ML contributions need explicit LM framing.
- **Newer venue norms**: COLM is newer than NeurIPS/ICML, so reviewer calibration varies more. Write more defensively — anticipate a wider range of reviewer expertise.
- **ICLR-derived process**: Review process is modeled on ICLR (open reviews, author response period, discussion among reviewers).
- **Broad interpretation of "language modeling"**: Includes training, evaluation, alignment, safety, efficiency, applications, theory, multimodality (if language is central), and social impact of LMs.
-
-### Scoring
-
-COLM uses an ICLR-style scoring system:
- **8-10**: Strong accept (top papers)
- **6-7**: Weak accept (solid contribution)
- **5**: Borderline
- **3-4**: Weak reject (below threshold)
- **1-2**: Strong reject
-
---
-
 ## What Makes Reviews Strong

 ### Following Daniel Dennett's Rules
--- a/skills/research/research-paper-writing/references/sources.md
+++ b/skills/research/research-paper-writing/references/sources.md
--- a/skills/research/research-paper-writing/references/writing-guide.md
+++ b/skills/research/research-paper-writing/references/writing-guide.md
@@ -225,6 +225,8 @@ Provide context before asking the reader to consider anything new. This applies

 ---

+---
+
 ## Micro-Level Writing Tips

 ### From Ethan Perez (Anthropic)
--- a/skills/research/research-paper-writing/templates/README.md
+++ b/skills/research/research-paper-writing/templates/README.md
--- a/skills/research/research-paper-writing/templates/aaai2026/README.md
+++ b/skills/research/research-paper-writing/templates/aaai2026/README.md
--- a/skills/research/research-paper-writing/templates/aaai2026/aaai2026-unified-supp.tex
+++ b/skills/research/research-paper-writing/templates/aaai2026/aaai2026-unified-supp.tex
--- a/skills/research/research-paper-writing/templates/aaai2026/aaai2026-unified-template.tex
+++ b/skills/research/research-paper-writing/templates/aaai2026/aaai2026-unified-template.tex
--- a/skills/research/research-paper-writing/templates/aaai2026/aaai2026.bib
+++ b/skills/research/research-paper-writing/templates/aaai2026/aaai2026.bib
--- a/skills/research/research-paper-writing/templates/aaai2026/aaai2026.bst
+++ b/skills/research/research-paper-writing/templates/aaai2026/aaai2026.bst
--- a/skills/research/research-paper-writing/templates/aaai2026/aaai2026.sty
+++ b/skills/research/research-paper-writing/templates/aaai2026/aaai2026.sty
--- a/skills/research/research-paper-writing/templates/acl/README.md
+++ b/skills/research/research-paper-writing/templates/acl/README.md
--- a/skills/research/research-paper-writing/templates/acl/acl.sty
+++ b/skills/research/research-paper-writing/templates/acl/acl.sty
--- a/skills/research/research-paper-writing/templates/acl/acl_latex.tex
+++ b/skills/research/research-paper-writing/templates/acl/acl_latex.tex
--- a/skills/research/research-paper-writing/templates/acl/acl_lualatex.tex
+++ b/skills/research/research-paper-writing/templates/acl/acl_lualatex.tex
--- a/skills/research/research-paper-writing/templates/acl/acl_natbib.bst
+++ b/skills/research/research-paper-writing/templates/acl/acl_natbib.bst
--- a/skills/research/research-paper-writing/templates/acl/anthology.bib.txt
+++ b/skills/research/research-paper-writing/templates/acl/anthology.bib.txt
--- a/skills/research/research-paper-writing/templates/acl/custom.bib
+++ b/skills/research/research-paper-writing/templates/acl/custom.bib
--- a/skills/research/research-paper-writing/templates/acl/formatting.md
+++ b/skills/research/research-paper-writing/templates/acl/formatting.md
--- a/skills/research/research-paper-writing/templates/colm2025/README.md
+++ b/skills/research/research-paper-writing/templates/colm2025/README.md
--- a/skills/research/research-paper-writing/templates/colm2025/colm2025_conference.bib
+++ b/skills/research/research-paper-writing/templates/colm2025/colm2025_conference.bib
--- a/skills/research/research-paper-writing/templates/colm2025/colm2025_conference.bst
+++ b/skills/research/research-paper-writing/templates/colm2025/colm2025_conference.bst
--- a/skills/research/research-paper-writing/templates/colm2025/colm2025_conference.pdf
+++ b/skills/research/research-paper-writing/templates/colm2025/colm2025_conference.pdf
--- a/skills/research/research-paper-writing/templates/colm2025/colm2025_conference.sty
+++ b/skills/research/research-paper-writing/templates/colm2025/colm2025_conference.sty
--- a/skills/research/research-paper-writing/templates/colm2025/colm2025_conference.tex
+++ b/skills/research/research-paper-writing/templates/colm2025/colm2025_conference.tex
--- a/skills/research/research-paper-writing/templates/colm2025/fancyhdr.sty
+++ b/skills/research/research-paper-writing/templates/colm2025/fancyhdr.sty
--- a/skills/research/research-paper-writing/templates/colm2025/math_commands.tex
+++ b/skills/research/research-paper-writing/templates/colm2025/math_commands.tex
--- a/skills/research/research-paper-writing/templates/colm2025/natbib.sty
+++ b/skills/research/research-paper-writing/templates/colm2025/natbib.sty
--- a/skills/research/research-paper-writing/templates/iclr2026/fancyhdr.sty
+++ b/skills/research/research-paper-writing/templates/iclr2026/fancyhdr.sty
--- a/skills/research/research-paper-writing/templates/iclr2026/iclr2026_conference.bib
+++ b/skills/research/research-paper-writing/templates/iclr2026/iclr2026_conference.bib
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
dmahan93	be43bee11a	final changes from successful run	2026-04-22 14:57:57 -05:00
dmahan93	721e0b96cd	add length eviction if no compression	2026-04-16 01:10:11 -05:00
dmahan93	d988343570	fixup some compression stuff	2026-04-14 00:22:52 -05:00
dmahan93	43dee2e1cf	update for rl overrides	2026-04-14 00:16:04 -05:00
dmahan93	637a214820	fix: token ID extraction bugs in run_agent.py - hasattr() returns bool, not None — changed 'is not None' to proper check - Fixed variable name typo: assistant_msg -> assistant_message - Trajectory format: use 'in' dict check instead of hasattr on dicts	2026-04-04 14:59:18 -05:00
dmahan93	f168a4f1bf	add prompt_tokens/ generation logprobs to run_agent	2026-04-04 13:35:42 -05:00
dmahan93	6442255f83	clean up agent_loop.py: remove debug print and dead comments	2026-04-03 18:11:26 -05:00
dmahan93	44371a9bbb	add nemo gym support	2026-04-03 18:02:08 -05:00