gateway/session.py

"""
Session management for the gateway.

Handles:
- Session context tracking (where messages come from)
- Session storage (conversations persisted to disk)
- Reset policy evaluation (when to start fresh)
- Dynamic system prompt injection (agent knows its context)
"""

import hashlib
import logging
import os
import json
import threading
import uuid
from pathlib import Path
from datetime import datetime, timedelta
from dataclasses import dataclass
from typing import Dict, List, Optional, Any

logger = logging.getLogger(__name__)


def _now() -> datetime:
    """Return the current local time."""
    return datetime.now()


# ---------------------------------------------------------------------------
# PII redaction helpers
# ---------------------------------------------------------------------------

def _hash_id(value: str) -> str:
    """Deterministic 12-char hex hash of an identifier."""
    return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]


def _hash_sender_id(value: str) -> str:
    """Hash a sender ID to ``user_<12hex>``."""
    return f"user_{_hash_id(value)}"


def _hash_chat_id(value: str) -> str:
    """Hash the numeric portion of a chat ID, preserving platform prefix.

    ``telegram:12345`` → ``telegram:<hash>``
    ``12345``          → ``<hash>``
    """
    colon = value.find(":")
    if colon > 0:
        prefix = value[:colon]
        return f"{prefix}:{_hash_id(value[colon + 1:])}"
    return _hash_id(value)


from .config import (
    Platform,
    GatewayConfig,
    SessionResetPolicy,  # noqa: F401 — re-exported via gateway/__init__.py
    HomeChannel,
)
from .whatsapp_identity import (
    canonical_whatsapp_identifier,
    normalize_whatsapp_identifier,
)


@dataclass
class SessionSource:
    """
    Describes where a message originated from.
    
    This information is used to:
    1. Route responses back to the right place
    2. Inject context into the system prompt
    3. Track origin for cron job delivery
    """
    platform: Platform
    chat_id: str
    chat_name: Optional[str] = None
    chat_type: str = "dm"  # "dm", "group", "channel", "thread"
    user_id: Optional[str] = None
    user_name: Optional[str] = None
    thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
    chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
    guild_id: Optional[str] = None  # Discord guild / Slack workspace / Matrix server scope
    parent_chat_id: Optional[str] = None  # Parent channel when chat_id refers to a thread
    message_id: Optional[str] = None  # ID of the triggering message (for pin/reply/react)
    
    @property
    def description(self) -> str:
        """Human-readable description of the source."""
        if self.platform == Platform.LOCAL:
            return "CLI terminal"
        
        parts = []
        if self.chat_type == "dm":
            parts.append(f"DM with {self.user_name or self.user_id or 'user'}")
        elif self.chat_type == "group":
            parts.append(f"group: {self.chat_name or self.chat_id}")
        elif self.chat_type == "channel":
            parts.append(f"channel: {self.chat_name or self.chat_id}")
        else:
            parts.append(self.chat_name or self.chat_id)
        
        if self.thread_id:
            parts.append(f"thread: {self.thread_id}")
        
        return ", ".join(parts)
    
    def to_dict(self) -> Dict[str, Any]:
        d = {
            "platform": self.platform.value,
            "chat_id": self.chat_id,
            "chat_name": self.chat_name,
            "chat_type": self.chat_type,
            "user_id": self.user_id,
            "user_name": self.user_name,
            "thread_id": self.thread_id,
            "chat_topic": self.chat_topic,
        }
        if self.user_id_alt:
            d["user_id_alt"] = self.user_id_alt
        if self.chat_id_alt:
            d["chat_id_alt"] = self.chat_id_alt
        if self.guild_id:
            d["guild_id"] = self.guild_id
        if self.parent_chat_id:
            d["parent_chat_id"] = self.parent_chat_id
        if self.message_id:
            d["message_id"] = self.message_id
        return d

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
        return cls(
            platform=Platform(data["platform"]),
            chat_id=str(data["chat_id"]),
            chat_name=data.get("chat_name"),
            chat_type=data.get("chat_type", "dm"),
            user_id=data.get("user_id"),
            user_name=data.get("user_name"),
            thread_id=data.get("thread_id"),
            chat_topic=data.get("chat_topic"),
            user_id_alt=data.get("user_id_alt"),
            chat_id_alt=data.get("chat_id_alt"),
            guild_id=data.get("guild_id"),
            parent_chat_id=data.get("parent_chat_id"),
            message_id=data.get("message_id"),
        )
    

@dataclass
class SessionContext:
    """
    Full context for a session, used for dynamic system prompt injection.
    
    The agent receives this information to understand:
    - Where messages are coming from
    - What platforms are available
    - Where it can deliver scheduled task outputs
    """
    source: SessionSource
    connected_platforms: List[Platform]
    home_channels: Dict[Platform, HomeChannel]
    shared_multi_user_session: bool = False
    
    # Session metadata
    session_key: str = ""
    session_id: str = ""
    created_at: Optional[datetime] = None
    updated_at: Optional[datetime] = None
    
    def to_dict(self) -> Dict[str, Any]:
        return {
            "source": self.source.to_dict(),
            "connected_platforms": [p.value for p in self.connected_platforms],
            "home_channels": {
                p.value: hc.to_dict() for p, hc in self.home_channels.items()
            },
            "shared_multi_user_session": self.shared_multi_user_session,
            "session_key": self.session_key,
            "session_id": self.session_id,
            "created_at": self.created_at.isoformat() if self.created_at else None,
            "updated_at": self.updated_at.isoformat() if self.updated_at else None,
        }


_PII_SAFE_PLATFORMS = frozenset({
    Platform.WHATSAPP,
    Platform.SIGNAL,
    Platform.TELEGRAM,
    Platform.BLUEBUBBLES,
})
"""Platforms where user IDs can be safely redacted (no in-message mention system
that requires raw IDs).  Discord is excluded because mentions use ``<@user_id>``
and the LLM needs the real ID to tag users."""


def _discord_tools_loaded() -> bool:
    """True iff the agent will actually have Discord tools this session.

    Two conditions must hold:
      1. The `discord` or `discord_admin` toolset is enabled for the
         Discord platform via `hermes tools` (opt-in, default OFF).
      2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
         at registry time, so the toolset being enabled in config is not
         enough if the token isn't configured.

    Returns False (safe default — keeps the stale-API disclaimer) on any
    error so a bad config can't silently promise tools the agent lacks.
    """
    if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
        return False
    try:
        from hermes_cli.config import load_config
        from hermes_cli.tools_config import _get_platform_tools
        cfg = load_config()
        enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
        return "discord" in enabled or "discord_admin" in enabled
    except Exception:
        return False


def build_session_context_prompt(
    context: SessionContext,
    *,
    redact_pii: bool = False,
) -> str:
    """
    Build the dynamic system prompt section that tells the agent about its context.
    
    This is injected into the system prompt so the agent knows:
    - Where messages are coming from
    - What platforms are connected
    - Where it can deliver scheduled task outputs

    When *redact_pii* is True **and** the source platform is in
    ``_PII_SAFE_PLATFORMS``, phone numbers are stripped and user/chat IDs
    are replaced with deterministic hashes before being sent to the LLM.
    Platforms like Discord are excluded because mentions need real IDs.
    Routing still uses the original values (they stay in SessionSource).
    """
    # Only apply redaction on platforms where IDs aren't needed for mentions
    redact_pii = redact_pii and context.source.platform in _PII_SAFE_PLATFORMS
    lines = [
        "## Current Session Context",
        "",
    ]
    
    # Source info
    platform_name = context.source.platform.value.title()
    if context.source.platform == Platform.LOCAL:
        lines.append(f"**Source:** {platform_name} (the machine running this agent)")
    else:
        # Build a description that respects PII redaction
        src = context.source
        if redact_pii:
            # Build a safe description without raw IDs
            _uname = src.user_name or (
                _hash_sender_id(src.user_id) if src.user_id else "user"
            )
            _cname = src.chat_name or _hash_chat_id(src.chat_id)
            if src.chat_type == "dm":
                desc = f"DM with {_uname}"
            elif src.chat_type == "group":
                desc = f"group: {_cname}"
            elif src.chat_type == "channel":
                desc = f"channel: {_cname}"
            else:
                desc = _cname
        else:
            desc = src.description
        lines.append(f"**Source:** {platform_name} ({desc})")
    
    # Channel topic (if available - provides context about the channel's purpose)
    if context.source.chat_topic:
        lines.append(f"**Channel Topic:** {context.source.chat_topic}")

    # User identity.
    # In shared multi-user sessions (shared threads OR shared non-thread groups
    # when group_sessions_per_user=False), multiple users contribute to the same
    # conversation.  Don't pin a single user name in the system prompt — it
    # changes per-turn and would bust the prompt cache.  Instead, note that
    # this is a multi-user session; individual sender names are prefixed on
    # each user message by the gateway.
    if context.shared_multi_user_session:
        session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
        lines.append(
            f"**Session type:** {session_label} — messages are prefixed "
            "with [sender name]. Multiple users may participate."
        )
    elif context.source.user_name:
        lines.append(f"**User:** {context.source.user_name}")
    elif context.source.user_id:
        uid = context.source.user_id
        if redact_pii:
            uid = _hash_sender_id(uid)
        lines.append(f"**User ID:** {uid}")
    
    # Platform-specific behavioral notes
    if context.source.platform == Platform.SLACK:
        lines.append("")
        lines.append(
            "**Platform notes:** You are running inside Slack. "
            "You do NOT have access to Slack-specific APIs — you cannot search "
            "channel history, pin/unpin messages, manage channels, or list users. "
            "Do not promise to perform these actions. The gateway may inline the "
            "current message's Slack block/attachment payload when available, but "
            "you still cannot call Slack APIs yourself."
        )
    elif context.source.platform == Platform.DISCORD:
        # Inject the Discord IDs block only when the agent actually has
        # Discord tools loaded this session — i.e. the user opted into
        # `discord` / `discord_admin` via `hermes tools` AND the bot
        # token is configured.  Otherwise keep the stale-API disclaimer
        # honest so we never promise tools the agent lacks.
        if _discord_tools_loaded():
            src = context.source
            id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
            if src.guild_id:
                id_lines.append(f"  - Guild: `{src.guild_id}`")
            if src.thread_id and src.parent_chat_id:
                id_lines.append(f"  - Parent channel: `{src.parent_chat_id}`")
                id_lines.append(f"  - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
            else:
                id_lines.append(f"  - Channel: `{src.chat_id}`")
            if src.message_id:
                id_lines.append(f"  - Triggering message: `{src.message_id}`")
            lines.extend(id_lines)
        else:
            lines.append("")
            lines.append(
                "**Platform notes:** You are running inside Discord. "
                "You do NOT have access to Discord-specific APIs — you cannot search "
                "channel history, pin messages, manage roles, or list server members. "
                "Do not promise to perform these actions. If the user asks, explain "
                "that you can only read messages sent directly to you and respond."
            )
    elif context.source.platform == Platform.BLUEBUBBLES:
        lines.append("")
        lines.append(
            "**Platform notes:** You are responding via iMessage. "
            "Keep responses short and conversational — think texts, not essays. "
            "Structure longer replies as separate short thoughts, each separated "
            "by a blank line (double newline). Each block between blank lines "
            "will be delivered as its own iMessage bubble, so write accordingly: "
            "one idea per bubble, 1–3 sentences each. "
            "If the user needs a detailed answer, give the short version first "
            "and offer to elaborate."
        )
    elif context.source.platform == Platform.YUANBAO:
        lines.append("")
        lines.append(
            "**Platform notes:** You are running inside Yuanbao. "
            "You CAN send private (DM) messages via the send_message tool. "
            "Use target='yuanbao:direct:<account_id>' for DM "
            "and target='yuanbao:group:<group_code>' for group chat."
        )

    # Connected platforms
    platforms_list = ["local (files on this machine)"]
    for p in context.connected_platforms:
        if p != Platform.LOCAL:
            platforms_list.append(f"{p.value}: Connected ✓")
    
    lines.append(f"**Connected Platforms:** {', '.join(platforms_list)}")
    
    # Home channels
    if context.home_channels:
        lines.append("")
        lines.append("**Home Channels (default destinations):**")
        for platform, home in context.home_channels.items():
            hc_id = _hash_chat_id(home.chat_id) if redact_pii else home.chat_id
            lines.append(f"  - {platform.value}: {home.name} (ID: {hc_id})")
    
    # Delivery options for scheduled tasks
    lines.append("")
    lines.append("**Delivery options for scheduled tasks:**")
    
    from hermes_constants import display_hermes_home

    # Origin delivery
    if context.source.platform == Platform.LOCAL:
        lines.append("- `\"origin\"` → Local output (saved to files)")
    else:
        _origin_label = context.source.chat_name or (
            _hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id
        )
        lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})")

    # Local always available
    lines.append(
        f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)"
    )
    
    # Platform home channels
    for platform, home in context.home_channels.items():
        lines.append(f"- `\"{platform.value}\"` → Home channel ({home.name})")
    
    # Note about explicit targeting
    lines.append("")
    lines.append("*For explicit targeting, use `\"platform:chat_id\"` format if the user provides a specific chat ID.*")
    
    return "\n".join(lines)


@dataclass
class SessionEntry:
    """
    Entry in the session store.
    
    Maps a session key to its current session ID and metadata.
    """
    session_key: str
    session_id: str
    created_at: datetime
    updated_at: datetime
    
    # Origin metadata for delivery routing
    origin: Optional[SessionSource] = None
    
    # Display metadata
    display_name: Optional[str] = None
    platform: Optional[Platform] = None
    chat_type: str = "dm"
    
    # Token tracking
    input_tokens: int = 0
    output_tokens: int = 0
    cache_read_tokens: int = 0
    cache_write_tokens: int = 0
    total_tokens: int = 0
    estimated_cost_usd: float = 0.0
    cost_status: str = "unknown"
    
    # Last API-reported prompt tokens (for accurate compression pre-check)
    last_prompt_tokens: int = 0
    
    # Set when a session was created because the previous one expired;
    # consumed once by the message handler to inject a notice into context
    was_auto_reset: bool = False
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
    
    # Set by the background expiry watcher after it finalizes an expired
    # session (invoking on_session_finalize hooks and evicting the cached
    # agent).  Persisted to sessions.json so the flag survives gateway
    # restarts — prevents redundant finalization runs.
    expiry_finalized: bool = False

    # When True the next call to get_or_create_session() will auto-reset
    # this session (create a new session_id) so the user starts fresh.
    # Set by /stop to break stuck-resume loops (#7536).
    suspended: bool = False

    # When True the session was interrupted by a gateway restart/shutdown
    # drain timeout, but recovery is still expected.  Unlike ``suspended``,
    # ``resume_pending`` preserves the existing session_id on next access —
    # the user stays on the same transcript and the agent auto-continues
    # from where it left off.  Cleared after the next successful turn.
    # Escalation to ``suspended`` is handled by the existing
    # ``.restart_failure_counts`` stuck-loop counter (#7536), not by a
    # parallel counter on this entry.
    resume_pending: bool = False
    resume_reason: Optional[str] = None  # e.g. "restart_timeout"
    last_resume_marked_at: Optional[datetime] = None

    def to_dict(self) -> Dict[str, Any]:
        result = {
            "session_key": self.session_key,
            "session_id": self.session_id,
            "created_at": self.created_at.isoformat(),
            "updated_at": self.updated_at.isoformat(),
            "display_name": self.display_name,
            "platform": self.platform.value if self.platform else None,
            "chat_type": self.chat_type,
            "input_tokens": self.input_tokens,
            "output_tokens": self.output_tokens,
            "cache_read_tokens": self.cache_read_tokens,
            "cache_write_tokens": self.cache_write_tokens,
            "total_tokens": self.total_tokens,
            "last_prompt_tokens": self.last_prompt_tokens,
            "estimated_cost_usd": self.estimated_cost_usd,
            "cost_status": self.cost_status,
            "expiry_finalized": self.expiry_finalized,
            "suspended": self.suspended,
            "resume_pending": self.resume_pending,
            "resume_reason": self.resume_reason,
            "last_resume_marked_at": (
                self.last_resume_marked_at.isoformat()
                if self.last_resume_marked_at
                else None
            ),
        }
        if self.origin:
            result["origin"] = self.origin.to_dict()
        return result
    
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "SessionEntry":
        origin = None
        if "origin" in data and data["origin"]:
            origin = SessionSource.from_dict(data["origin"])
        
        platform = None
        if data.get("platform"):
            try:
                platform = Platform(data["platform"])
            except ValueError as e:
                logger.debug("Unknown platform value %r: %s", data["platform"], e)

        last_resume_marked_at = None
        _lrma = data.get("last_resume_marked_at")
        if _lrma:
            try:
                last_resume_marked_at = datetime.fromisoformat(_lrma)
            except (TypeError, ValueError):
                last_resume_marked_at = None

        return cls(
            session_key=data["session_key"],
            session_id=data["session_id"],
            created_at=datetime.fromisoformat(data["created_at"]),
            updated_at=datetime.fromisoformat(data["updated_at"]),
            origin=origin,
            display_name=data.get("display_name"),
            platform=platform,
            chat_type=data.get("chat_type", "dm"),
            input_tokens=data.get("input_tokens", 0),
            output_tokens=data.get("output_tokens", 0),
            cache_read_tokens=data.get("cache_read_tokens", 0),
            cache_write_tokens=data.get("cache_write_tokens", 0),
            total_tokens=data.get("total_tokens", 0),
            last_prompt_tokens=data.get("last_prompt_tokens", 0),
            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
            cost_status=data.get("cost_status", "unknown"),
            expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
            suspended=data.get("suspended", False),
            resume_pending=data.get("resume_pending", False),
            resume_reason=data.get("resume_reason"),
            last_resume_marked_at=last_resume_marked_at,
        )


def is_shared_multi_user_session(
    source: SessionSource,
    *,
    group_sessions_per_user: bool = True,
    thread_sessions_per_user: bool = False,
) -> bool:
    """Return True when a non-DM session is shared across participants.

    Mirrors the isolation rules in :func:`build_session_key`:
      - DMs are never shared.
      - Threads are shared unless ``thread_sessions_per_user`` is True.
      - Non-thread group/channel sessions are shared unless
        ``group_sessions_per_user`` is True (default: True = isolated).
    """
    if source.chat_type == "dm":
        return False
    if source.thread_id:
        return not thread_sessions_per_user
    return not group_sessions_per_user


def build_session_key(
    source: SessionSource,
    group_sessions_per_user: bool = True,
    thread_sessions_per_user: bool = False,
) -> str:
    """Build a deterministic session key from a message source.

    This is the single source of truth for session key construction.

    DM rules:
      - DMs include chat_id when present, so each private conversation is isolated.
      - thread_id further differentiates threaded DMs within the same DM chat.
      - Without chat_id, thread_id is used as a best-effort fallback.
      - Without thread_id or chat_id, DMs share a single session.

    Group/channel rules:
      - chat_id identifies the parent group/channel.
      - user_id/user_id_alt isolates participants within that parent chat when available when
        ``group_sessions_per_user`` is enabled.
      - thread_id differentiates threads within that parent chat.  When
        ``thread_sessions_per_user`` is False (default), threads are *shared* across all
        participants — user_id is NOT appended, so every user in the thread
        shares a single session.  This is the expected UX for threaded
        conversations (Telegram forum topics, Discord threads, Slack threads).
      - Without participant identifiers, or when isolation is disabled, messages fall back to one
        shared session per chat.
      - Without identifiers, messages fall back to one session per platform/chat_type.
    """
    platform = source.platform.value
    if source.chat_type == "dm":
        dm_chat_id = source.chat_id
        if source.platform == Platform.WHATSAPP:
            dm_chat_id = canonical_whatsapp_identifier(source.chat_id)

        if dm_chat_id:
            if source.thread_id:
                return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
            return f"agent:main:{platform}:dm:{dm_chat_id}"
        if source.thread_id:
            return f"agent:main:{platform}:dm:{source.thread_id}"
        return f"agent:main:{platform}:dm"

    participant_id = source.user_id_alt or source.user_id
    if participant_id and source.platform == Platform.WHATSAPP:
        # Same JID/LID-flip bug as the DM case: without canonicalisation, a
        # single group member gets two isolated per-user sessions when the
        # bridge reshuffles alias forms.
        participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
    key_parts = ["agent:main", platform, source.chat_type]

    if source.chat_id:
        key_parts.append(source.chat_id)
    if source.thread_id:
        key_parts.append(source.thread_id)

    # In threads, default to shared sessions (all participants see the same
    # conversation).  Per-user isolation only applies when explicitly enabled
    # via thread_sessions_per_user, or when there is no thread (regular group).
    isolate_user = group_sessions_per_user
    if source.thread_id and not thread_sessions_per_user:
        isolate_user = False

    if isolate_user and participant_id:
        key_parts.append(str(participant_id))

    return ":".join(key_parts)


class SessionStore:
    """
    Manages session storage and retrieval.
    
    Uses SQLite (via SessionDB) for session metadata and message transcripts.
    Falls back to legacy JSONL files if SQLite is unavailable.
    """
    
    def __init__(self, sessions_dir: Path, config: GatewayConfig,
                 has_active_processes_fn=None):
        self.sessions_dir = sessions_dir
        self.config = config
        self._entries: Dict[str, SessionEntry] = {}
        self._loaded = False
        self._lock = threading.Lock()
        self._has_active_processes_fn = has_active_processes_fn
        
        # Initialize SQLite session database
        self._db = None
        try:
            from hermes_state import SessionDB
            self._db = SessionDB()
        except Exception as e:
            print(f"[gateway] Warning: SQLite session store unavailable, falling back to JSONL: {e}")
    
    def _ensure_loaded(self) -> None:
        """Load sessions index from disk if not already loaded."""
        with self._lock:
            self._ensure_loaded_locked()

    def _ensure_loaded_locked(self) -> None:
        """Load sessions index from disk. Must be called with self._lock held."""
        if self._loaded:
            return

        self.sessions_dir.mkdir(parents=True, exist_ok=True)
        sessions_file = self.sessions_dir / "sessions.json"

        if sessions_file.exists():
            try:
                with open(sessions_file, "r", encoding="utf-8") as f:
                    data = json.load(f)
                    for key, entry_data in data.items():
                        try:
                            self._entries[key] = SessionEntry.from_dict(entry_data)
                        except (ValueError, KeyError):
                            # Skip entries with unknown/removed platform values
                            continue
            except Exception as e:
                print(f"[gateway] Warning: Failed to load sessions: {e}")

        self._loaded = True
    
    def _save(self) -> None:
        """Save sessions index to disk (kept for session key -> ID mapping)."""
        import tempfile
        self.sessions_dir.mkdir(parents=True, exist_ok=True)
        sessions_file = self.sessions_dir / "sessions.json"

        data = {key: entry.to_dict() for key, entry in self._entries.items()}
        fd, tmp_path = tempfile.mkstemp(
            dir=str(self.sessions_dir), suffix=".tmp", prefix=".sessions_"
        )
        try:
            with os.fdopen(fd, "w", encoding="utf-8") as f:
                json.dump(data, f, indent=2)
                f.flush()
                os.fsync(f.fileno())
            os.replace(tmp_path, sessions_file)
        except BaseException:
            try:
                os.unlink(tmp_path)
            except OSError as e:
                logger.debug("Could not remove temp file %s: %s", tmp_path, e)
            raise
    
    def _generate_session_key(self, source: SessionSource) -> str:
        """Generate a session key from a source."""
        return build_session_key(
            source,
            group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
            thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
        )
    
    def _is_session_expired(self, entry: SessionEntry) -> bool:
        """Check if a session has expired based on its reset policy.
        
        Works from the entry alone — no SessionSource needed.
        Used by the background expiry watcher to proactively flush memories.
        Sessions with active background processes are never considered expired.
        """
        if self._has_active_processes_fn:
            if self._has_active_processes_fn(entry.session_key):
                return False

        policy = self.config.get_reset_policy(
            platform=entry.platform,
            session_type=entry.chat_type,
        )

        if policy.mode == "none":
            return False

        now = _now()

        if policy.mode in ("idle", "both"):
            idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
            if now > idle_deadline:
                return True

        if policy.mode in ("daily", "both"):
            today_reset = now.replace(
                hour=policy.at_hour,
                minute=0, second=0, microsecond=0,
            )
            if now.hour < policy.at_hour:
                today_reset -= timedelta(days=1)
            if entry.updated_at < today_reset:
                return True

        return False

    def _should_reset(self, entry: SessionEntry, source: SessionSource) -> Optional[str]:
        """
        Check if a session should be reset based on policy.
        
        Returns the reset reason ("idle" or "daily") if a reset is needed,
        or None if the session is still valid.
        
        Sessions with active background processes are never reset.
        """
        if self._has_active_processes_fn:
            session_key = self._generate_session_key(source)
            if self._has_active_processes_fn(session_key):
                return None

        policy = self.config.get_reset_policy(
            platform=source.platform,
            session_type=source.chat_type
        )
        
        if policy.mode == "none":
            return None
        
        now = _now()
        
        if policy.mode in ("idle", "both"):
            idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
            if now > idle_deadline:
                return "idle"
        
        if policy.mode in ("daily", "both"):
            today_reset = now.replace(
                hour=policy.at_hour, 
                minute=0, 
                second=0, 
                microsecond=0
            )
            if now.hour < policy.at_hour:
                today_reset -= timedelta(days=1)
            
            if entry.updated_at < today_reset:
                return "daily"
        
        return None
    
    def has_any_sessions(self) -> bool:
        """Check if any sessions have ever been created (across all platforms).

        Uses the SQLite database as the source of truth because it preserves
        historical session records (ended sessions still count).  The in-memory
        ``_entries`` dict replaces entries on reset, so ``len(_entries)`` would
        stay at 1 for single-platform users — which is the bug this fixes.

        The current session is already in the DB by the time this is called
        (get_or_create_session runs first), so we check ``> 1``.
        """
        if self._db:
            try:
                return self._db.session_count() > 1
            except Exception:
                pass  # fall through to heuristic
        # Fallback: check if sessions.json was loaded with existing data.
        # This covers the rare case where the DB is unavailable.
        with self._lock:
            self._ensure_loaded_locked()
            return len(self._entries) > 1

    def get_or_create_session(
        self,
        source: SessionSource,
        force_new: bool = False
    ) -> SessionEntry:
        """
        Get an existing session or create a new one.

        Evaluates reset policy to determine if the existing session is stale.
        Creates a session record in SQLite when a new session starts.
        """
        session_key = self._generate_session_key(source)
        now = _now()

        # SQLite calls are made outside the lock to avoid holding it during I/O.
        # All _entries / _loaded mutations are protected by self._lock.
        db_end_session_id = None
        db_create_kwargs = None

        with self._lock:
            self._ensure_loaded_locked()

            if session_key in self._entries and not force_new:
                entry = self._entries[session_key]

                # Auto-reset sessions marked as suspended (e.g. after /stop
                # broke a stuck loop — #7536).  ``suspended`` is the hard
                # forced-wipe signal and always wins over ``resume_pending``,
                # so repeated interrupted restarts that escalate via the
                # existing ``.restart_failure_counts`` stuck-loop counter
                # still converge to a clean slate.
                if entry.suspended:
                    reset_reason = "suspended"
                elif entry.resume_pending:
                    # Restart-interrupted session: preserve the session_id
                    # and return the existing entry so the transcript
                    # reloads intact.  ``resume_pending`` is cleared after
                    # the NEXT successful turn completes (not here), which
                    # means a re-interrupted retry keeps trying — the
                    # stuck-loop counter handles terminal escalation.
                    entry.updated_at = now
                    self._save()
                    return entry
                else:
                    reset_reason = self._should_reset(entry, source)
                if not reset_reason:
                    entry.updated_at = now
                    self._save()
                    return entry
                else:
                    # Session is being auto-reset.
                    was_auto_reset = True
                    auto_reset_reason = reset_reason
                    # Track whether the expired session had any real conversation
                    reset_had_activity = entry.total_tokens > 0
                    db_end_session_id = entry.session_id
            else:
                was_auto_reset = False
                auto_reset_reason = None
                reset_had_activity = False

            # Create new session
            session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"

            entry = SessionEntry(
                session_key=session_key,
                session_id=session_id,
                created_at=now,
                updated_at=now,
                origin=source,
                display_name=source.chat_name,
                platform=source.platform,
                chat_type=source.chat_type,
                was_auto_reset=was_auto_reset,
                auto_reset_reason=auto_reset_reason,
                reset_had_activity=reset_had_activity,
            )

            self._entries[session_key] = entry
            self._save()
            db_create_kwargs = {
                "session_id": session_id,
                "source": source.platform.value,
                "user_id": source.user_id,
            }

        # SQLite operations outside the lock
        if self._db and db_end_session_id:
            try:
                self._db.end_session(db_end_session_id, "session_reset")
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)

        if self._db and db_create_kwargs:
            try:
                self._db.create_session(**db_create_kwargs)
            except Exception as e:
                print(f"[gateway] Warning: Failed to create SQLite session: {e}")

        return entry

    def update_session(
        self,
        session_key: str,
        last_prompt_tokens: int = None,
    ) -> None:
        """Update lightweight session metadata after an interaction."""
        with self._lock:
            self._ensure_loaded_locked()

            if session_key in self._entries:
                entry = self._entries[session_key]
                entry.updated_at = _now()
                if last_prompt_tokens is not None:
                    entry.last_prompt_tokens = last_prompt_tokens
                self._save()

    def suspend_session(self, session_key: str) -> bool:
        """Mark a session as suspended so it auto-resets on next access.

        Used by ``/stop`` to prevent stuck sessions from being resumed
        after a gateway restart (#7536).  Returns True if the session
        existed and was marked.
        """
        with self._lock:
            self._ensure_loaded_locked()
            if session_key in self._entries:
                self._entries[session_key].suspended = True
                self._save()
                return True
        return False

    def mark_resume_pending(
        self,
        session_key: str,
        reason: str = "restart_timeout",
    ) -> bool:
        """Mark a session as resumable after a restart interruption.

        Unlike ``suspend_session()``, this preserves the existing
        ``session_id`` and the transcript.  The next call to
        ``get_or_create_session()`` for this key returns the same entry
        so the user auto-resumes on the same conversation lane.

        Returns True if the session existed and was marked.
        """
        with self._lock:
            self._ensure_loaded_locked()
            if session_key in self._entries:
                entry = self._entries[session_key]
                # Never override an explicit ``suspended`` — that is a hard
                # forced-wipe signal (from /stop or stuck-loop escalation).
                if entry.suspended:
                    return False
                entry.resume_pending = True
                entry.resume_reason = reason
                entry.last_resume_marked_at = _now()
                self._save()
                return True
        return False

    def clear_resume_pending(self, session_key: str) -> bool:
        """Clear the resume-pending flag after a successful resumed turn.

        Called from the gateway after ``run_conversation()`` returns a
        final response for a session that had ``resume_pending=True``,
        signalling that recovery succeeded.

        Returns True if a flag was cleared.
        """
        with self._lock:
            self._ensure_loaded_locked()
            entry = self._entries.get(session_key)
            if entry is None or not entry.resume_pending:
                return False
            entry.resume_pending = False
            entry.resume_reason = None
            entry.last_resume_marked_at = None
            self._save()
            return True

    def prune_old_entries(self, max_age_days: int) -> int:
        """Drop SessionEntry records older than max_age_days.

        Pruning is based on ``updated_at`` (last activity), not ``created_at``.
        A session that's been active within the window is kept regardless of
        how old it is.  Entries marked ``suspended`` are kept — the user
        explicitly paused them for later resume.  Entries held by an active
        process (via has_active_processes_fn) are also kept so long-running
        background work isn't orphaned.

        Pruning is functionally identical to a natural reset-policy expiry:
        the transcript in SQLite stays, but the session_key → session_id
        mapping is dropped and the user starts a fresh session on return.

        ``max_age_days <= 0`` disables pruning; returns 0 immediately.
        Returns the number of entries removed.
        """
        if max_age_days is None or max_age_days <= 0:
            return 0
        from datetime import timedelta

        cutoff = _now() - timedelta(days=max_age_days)
        removed_keys: list[str] = []

        with self._lock:
            self._ensure_loaded_locked()
            for key, entry in list(self._entries.items()):
                if entry.suspended:
                    continue
                # Never prune sessions with an active background process
                # attached — the user may still be waiting on output.
                # The callback is keyed by session_key (see process_registry.
                # has_active_for_session); passing session_id here used to
                # never match, so active sessions got pruned anyway.
                if self._has_active_processes_fn is not None:
                    try:
                        if self._has_active_processes_fn(entry.session_key):
                            continue
                    except Exception as exc:
                        logger.debug(
                            "has_active_processes_fn raised during prune for %s: %s",
                            entry.session_key, exc,
                        )
                if entry.updated_at < cutoff:
                    removed_keys.append(key)
            for key in removed_keys:
                self._entries.pop(key, None)
            if removed_keys:
                self._save()

        if removed_keys:
            logger.info(
                "SessionStore pruned %d entries older than %d days",
                len(removed_keys), max_age_days,
            )
        return len(removed_keys)

    def suspend_recently_active(self, max_age_seconds: int = 120) -> int:
        """Mark recently-active sessions as suspended.

        Called on gateway startup to prevent sessions that were likely
        in-flight when the gateway last exited from being blindly resumed
        (#7536).  Only suspends sessions updated within *max_age_seconds*
        to avoid resetting long-idle sessions that are harmless to resume.
        Returns the number of sessions that were suspended.

        Entries flagged ``resume_pending=True`` are skipped — those were
        marked intentionally by the drain-timeout path as recoverable.
        Terminal escalation for genuinely stuck ``resume_pending`` sessions
        is handled by the existing ``.restart_failure_counts`` stuck-loop
        counter, which runs after this method on startup.
        """
        from datetime import timedelta

        cutoff = _now() - timedelta(seconds=max_age_seconds)
        count = 0
        with self._lock:
            self._ensure_loaded_locked()
            for entry in self._entries.values():
                if entry.resume_pending:
                    continue
                if not entry.suspended and entry.updated_at >= cutoff:
                    entry.suspended = True
                    count += 1
            if count:
                self._save()
        return count

    def reset_session(self, session_key: str) -> Optional[SessionEntry]:
        """Force reset a session, creating a new session ID."""
        db_end_session_id = None
        db_create_kwargs = None
        new_entry = None

        with self._lock:
            self._ensure_loaded_locked()

            if session_key not in self._entries:
                return None

            old_entry = self._entries[session_key]
            db_end_session_id = old_entry.session_id

            now = _now()
            session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"

            new_entry = SessionEntry(
                session_key=session_key,
                session_id=session_id,
                created_at=now,
                updated_at=now,
                origin=old_entry.origin,
                display_name=old_entry.display_name,
                platform=old_entry.platform,
                chat_type=old_entry.chat_type,
            )

            self._entries[session_key] = new_entry
            self._save()
            db_create_kwargs = {
                "session_id": session_id,
                "source": old_entry.platform.value if old_entry.platform else "unknown",
                "user_id": old_entry.origin.user_id if old_entry.origin else None,
            }

        if self._db and db_end_session_id:
            try:
                self._db.end_session(db_end_session_id, "session_reset")
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)

        if self._db and db_create_kwargs:
            try:
                self._db.create_session(**db_create_kwargs)
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)

        return new_entry

    def switch_session(self, session_key: str, target_session_id: str) -> Optional[SessionEntry]:
        """Switch a session key to point at an existing session ID.

        Used by ``/resume`` to restore a previously-named session.
        Ends the current session in SQLite (like reset), but instead of
        generating a fresh session ID, re-uses ``target_session_id`` so the
        old transcript is loaded on the next message. If the target session was
        previously ended, re-open it so gateway resume semantics match the CLI.
        """
        db_end_session_id = None
        new_entry = None

        with self._lock:
            self._ensure_loaded_locked()

            if session_key not in self._entries:
                return None

            old_entry = self._entries[session_key]

            # Don't switch if already on that session
            if old_entry.session_id == target_session_id:
                return old_entry

            db_end_session_id = old_entry.session_id

            now = _now()
            new_entry = SessionEntry(
                session_key=session_key,
                session_id=target_session_id,
                created_at=now,
                updated_at=now,
                origin=old_entry.origin,
                display_name=old_entry.display_name,
                platform=old_entry.platform,
                chat_type=old_entry.chat_type,
            )

            self._entries[session_key] = new_entry
            self._save()

        if self._db and db_end_session_id:
            try:
                self._db.end_session(db_end_session_id, "session_switch")
            except Exception as e:
                logger.debug("Session DB end_session failed: %s", e)

        if self._db:
            try:
                self._db.reopen_session(target_session_id)
            except Exception as e:
                logger.debug("Session DB reopen_session failed: %s", e)

        return new_entry

    def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]:
        """List all sessions, optionally filtered by activity."""
        with self._lock:
            self._ensure_loaded_locked()
            entries = list(self._entries.values())

        if active_minutes is not None:
            cutoff = _now() - timedelta(minutes=active_minutes)
            entries = [e for e in entries if e.updated_at >= cutoff]

        entries.sort(key=lambda e: e.updated_at, reverse=True)

        return entries
    
    def get_transcript_path(self, session_id: str) -> Path:
        """Get the path to a session's legacy transcript file."""
        return self.sessions_dir / f"{session_id}.jsonl"
    
    def append_to_transcript(self, session_id: str, message: Dict[str, Any], skip_db: bool = False) -> None:
        """Append a message to a session's transcript (SQLite + legacy JSONL).

        Args:
            skip_db: When True, only write to JSONL and skip the SQLite write.
                     Used when the agent already persisted messages to SQLite
                     via its own _flush_messages_to_session_db(), preventing
                     the duplicate-write bug (#860).
        """
        # Write to SQLite (unless the agent already handled it)
        if self._db and not skip_db:
            try:
                self._db.append_message(
                    session_id=session_id,
                    role=message.get("role", "unknown"),
                    content=message.get("content"),
                    tool_name=message.get("tool_name"),
                    tool_calls=message.get("tool_calls"),
                    tool_call_id=message.get("tool_call_id"),
                    reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
                    codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
        
        # Also write legacy JSONL (keeps existing tooling working during transition)
        transcript_path = self.get_transcript_path(session_id)
        with open(transcript_path, "a", encoding="utf-8") as f:
            f.write(json.dumps(message, ensure_ascii=False) + "\n")
    
    def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
        """Replace the entire transcript for a session with new messages.
        
        Used by /retry, /undo, and /compress to persist modified conversation history.
        Rewrites both SQLite and legacy JSONL storage.
        """
        # SQLite: clear old messages and re-insert
        if self._db:
            try:
                self._db.clear_messages(session_id)
                for msg in messages:
                    role = msg.get("role", "unknown")
                    self._db.append_message(
                        session_id=session_id,
                        role=role,
                        content=msg.get("content"),
                        tool_name=msg.get("tool_name"),
                        tool_calls=msg.get("tool_calls"),
                        tool_call_id=msg.get("tool_call_id"),
                        reasoning=msg.get("reasoning") if role == "assistant" else None,
                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                        codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
                    )
            except Exception as e:
                logger.debug("Failed to rewrite transcript in DB: %s", e)
        
        # JSONL: overwrite the file
        transcript_path = self.get_transcript_path(session_id)
        with open(transcript_path, "w", encoding="utf-8") as f:
            for msg in messages:
                f.write(json.dumps(msg, ensure_ascii=False) + "\n")

    def load_transcript(self, session_id: str) -> List[Dict[str, Any]]:
        """Load all messages from a session's transcript."""
        db_messages = []
        # Try SQLite first
        if self._db:
            try:
                db_messages = self._db.get_messages_as_conversation(session_id)
            except Exception as e:
                logger.debug("Could not load messages from DB: %s", e)

        # Load legacy JSONL transcript (may contain more history than SQLite
        # for sessions created before the DB layer was introduced).
        transcript_path = self.get_transcript_path(session_id)
        jsonl_messages = []
        if transcript_path.exists():
            with open(transcript_path, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try:
                            jsonl_messages.append(json.loads(line))
                        except json.JSONDecodeError:
                            logger.warning(
                                "Skipping corrupt line in transcript %s: %s",
                                session_id, line[:120],
                            )

        # Prefer whichever source has more messages.
        #
        # Background: when a session pre-dates SQLite storage (or when the DB
        # layer was added while a long-lived session was already active), the
        # first post-migration turn writes only the *new* messages to SQLite
        # (because _flush_messages_to_session_db skips messages already in
        # conversation_history, assuming they're persisted).  On the *next*
        # turn load_transcript returns those few SQLite rows and ignores the
        # full JSONL history — the model sees a context of 1-4 messages instead
        # of hundreds.  Using the longer source prevents this silent truncation.
        if len(jsonl_messages) > len(db_messages):
            if db_messages:
                logger.debug(
                    "Session %s: JSONL has %d messages vs SQLite %d — "
                    "using JSONL (legacy session not yet fully migrated)",
                    session_id, len(jsonl_messages), len(db_messages),
                )
            return jsonl_messages

        return db_messages


def build_session_context(
    source: SessionSource,
    config: GatewayConfig,
    session_entry: Optional[SessionEntry] = None
) -> SessionContext:
    """
    Build a full session context from a source and config.
    
    This is used to inject context into the agent's system prompt.
    """
    connected = config.get_connected_platforms()
    
    home_channels = {}
    for platform in connected:
        home = config.get_home_channel(platform)
        if home:
            home_channels[platform] = home
    
    context = SessionContext(
        source=source,
        connected_platforms=connected,
        home_channels=home_channels,
        shared_multi_user_session=is_shared_multi_user_session(
            source,
            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
        ),
    )
    
    if session_entry:
        context.session_key = session_entry.session_key
        context.session_id = session_entry.session_id
        context.created_at = session_entry.created_at
        context.updated_at = session_entry.updated_at
    
    return context
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								"""
 								Session management for the gateway.
 								Handles:
 								- Session context tracking (where messages come from)
 								- Session storage (conversations persisted to disk)
 								- Reset policy evaluation (when to start fresh)
 								- Dynamic system prompt injection (agent knows its context)
 								"""
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								import hashlib
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								import logging
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								import os
 								import json
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								import threading
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								import uuid
 								from pathlib import Path
 								from datetime import datetime, timedelta
-												chore: remove ~100 unused imports across 55 files (#3016)

Automated cleanup via pyflakes + autoflake with manual review.

Changes:
- Removed unused stdlib imports (os, sys, json, pathlib.Path, etc.)
- Removed unused typing imports (List, Dict, Any, Optional, Tuple, Set, etc.)
- Removed unused internal imports (hermes_cli.auth, hermes_cli.config, etc.)
- Fixed cli.py: removed 8 shadowed banner imports (imported from hermes_cli.banner
  then immediately redefined locally — only build_welcome_banner is actually used)
- Added noqa comments to imports that appear unused but serve a purpose:
  - Re-exports (gateway/session.py SessionResetPolicy, tools/terminal_tool.py
    is_interrupted/_interrupt_event)
  - SDK presence checks in try/except (daytona, fal_client, discord)
  - Test mock targets (auxiliary_client.py Path, mcp_config.py get_hermes_home)

Zero behavioral changes. Full test suite passes (6162/6162, 2 pre-existing
streaming test failures unrelated to this change).
											
										
										
											2026-03-25 15:02:03 -07:00
+								from dataclasses import dataclass
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								from typing import Dict, List, Optional, Any
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								logger = logging.getLogger(__name__)
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								def _now() -> datetime:
 								    """Return the current local time."""
 								    return datetime.now()
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								# ---------------------------------------------------------------------------
 								# PII redaction helpers
 								# ---------------------------------------------------------------------------
 								def _hash_id(value: str) -> str:
 								    """Deterministic 12-char hex hash of an identifier."""
 								    return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]
 								def _hash_sender_id(value: str) -> str:
 								    """Hash a sender ID to ``user_<12hex>``."""
 								    return f"user_{_hash_id(value)}"
 								def _hash_chat_id(value: str) -> str:
 								    """Hash the numeric portion of a chat ID, preserving platform prefix.
 								    ``telegram:12345`` → ``telegram:<hash>``
 								    ``12345``          → ``<hash>``
 								    """
 								    colon = value.find(":")
 								    if colon > 0:
 								        prefix = value[:colon]
 								        return f"{prefix}:{_hash_id(value[colon + 1:])}"
 								    return _hash_id(value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								from .config import (
 								    Platform,
 								    GatewayConfig,
-												chore: remove ~100 unused imports across 55 files (#3016)

Automated cleanup via pyflakes + autoflake with manual review.

Changes:
- Removed unused stdlib imports (os, sys, json, pathlib.Path, etc.)
- Removed unused typing imports (List, Dict, Any, Optional, Tuple, Set, etc.)
- Removed unused internal imports (hermes_cli.auth, hermes_cli.config, etc.)
- Fixed cli.py: removed 8 shadowed banner imports (imported from hermes_cli.banner
  then immediately redefined locally — only build_welcome_banner is actually used)
- Added noqa comments to imports that appear unused but serve a purpose:
  - Re-exports (gateway/session.py SessionResetPolicy, tools/terminal_tool.py
    is_interrupted/_interrupt_event)
  - SDK presence checks in try/except (daytona, fal_client, discord)
  - Test mock targets (auxiliary_client.py Path, mcp_config.py get_hermes_home)

Zero behavioral changes. Full test suite passes (6162/6162, 2 pre-existing
streaming test failures unrelated to this change).
											
										
										
											2026-03-25 15:02:03 -07:00
+								    SessionResetPolicy,  # noqa: F401 — re-exported via gateway/__init__.py
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    HomeChannel,
 								)
-												refactor(gateway): extract WhatsApp identity helpers into shared module

Follow-up to the canonical-identity session-key fix: pull the
JID/LID normalize/expand/canonical helpers into gateway/whatsapp_identity.py
instead of living in two places. gateway/session.py (session-key build) and
gateway/run.py (authorisation allowlist) now both import from the shared
module, so the two resolution paths can't drift apart.

Also switches the auth path from module-level _hermes_home (cached at
import time) to dynamic get_hermes_home() lookup, which matches the
session-key path and correctly reflects HERMES_HOME env overrides. The
lone test that monkeypatched gateway.run._hermes_home for the WhatsApp
auth path is updated to set HERMES_HOME env var instead; all other
tests that monkeypatch _hermes_home for unrelated paths (update,
restart drain, shutdown marker, etc.) still work — the module-level
_hermes_home is untouched.

											
										
										
											2026-04-24 07:42:00 -07:00
+								from .whatsapp_identity import (
 								    canonical_whatsapp_identifier,
 								    normalize_whatsapp_identifier,
 								)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								@dataclass
 								class SessionSource:
 								    """
 								    Describes where a message originated from.
 								    This information is used to:
 . Route responses back to the right place
 . Inject context into the system prompt
 . Track origin for cron job delivery
 								    """
 								    platform: Platform
 								    chat_id: str
 								    chat_name: Optional[str] = None
 								    chat_type: str = "dm"  # "dm", "group", "channel", "thread"
 								    user_id: Optional[str] = None
 								    user_name: Optional[str] = None
 								    thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
-												feat(gateway): include Discord channel topic in session context

Fixes #163

- Add chat_topic field to SessionSource dataclass
- Update to_dict/from_dict for serialization support
- Add chat_topic parameter to build_source helper
- Extract channel.topic in Discord adapter for messages and slash commands
- Display Channel Topic in system prompt when available
- Normalize empty topics to None

											
										
										
											2026-03-01 03:48:24 -05:00
+								    chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
-												fix(feishu): correct identity model docs and prefer tenant-scoped user_id

Feishu's open_id is app-scoped (same user gets different open_ids per
bot app), not a canonical identity. Functionally correct for single-bot
mode but semantically misleading.

- Add comprehensive Feishu identity model documentation to module docstring
- Prefer user_id (tenant-scoped) over open_id (app-scoped) in
  _resolve_sender_profile when both are available
- Document bot_open_id usage for @mention matching
- Update user_id_alt comment in SessionSource to be platform-generic

Ref: closes analysis from PR #8388 (closed as over-scoped)

											
										
										
											2026-04-13 03:30:38 -07:00
+								    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								    chat_id_alt: Optional[str] = None  # Signal group internal ID
-												fix(discord): DISCORD_ALLOW_BOTS=mentions/all now works without DISCORD_ALLOWED_USERS

Fixes #4466.

Root cause: two sequential authorization gates both independently rejected
bot messages, making DISCORD_ALLOW_BOTS completely ineffective.

Gate 1 — `discord.py` `on_message`:
    _is_allowed_user ran BEFORE the bot filter, so bot senders were dropped
    before the DISCORD_ALLOW_BOTS policy was ever evaluated.

Gate 2 — `gateway/run.py` _is_user_authorized:
    The gateway-level allowlist check rejected bot IDs with 'Unauthorized
    user: <bot_id>' even if they passed Gate 1.

Fix:

  gateway/platforms/discord.py — reorder on_message so DISCORD_ALLOW_BOTS
  runs BEFORE _is_allowed_user. Bots permitted by the filter skip the
  user allowlist; non-bots are still checked.

  gateway/session.py — add is_bot: bool = False to SessionSource so the
  gateway layer can distinguish bot senders.

  gateway/platforms/base.py — expose is_bot parameter in build_source.

  gateway/platforms/discord.py _handle_message — set is_bot=True when
  building the SessionSource for bot authors.

  gateway/run.py _is_user_authorized — when source.is_bot is True AND
  DISCORD_ALLOW_BOTS is 'mentions' or 'all', return True early. Platform
  filter already validated the message at on_message; don't re-reject.

Behavior matrix:

  | Config                                     | Before  | After   |
  | DISCORD_ALLOW_BOTS=none (default)          | Blocked | Blocked |
  | DISCORD_ALLOW_BOTS=all                     | Blocked | Allowed |
  | DISCORD_ALLOW_BOTS=mentions + @mention     | Blocked | Allowed |
  | DISCORD_ALLOW_BOTS=mentions, no mention    | Blocked | Blocked |
  | Human in DISCORD_ALLOWED_USERS             | Allowed | Allowed |
  | Human NOT in DISCORD_ALLOWED_USERS         | Blocked | Blocked |

Co-authored-by: Hermes Maintainer <hermes@nousresearch.com>

											
										
										
											2026-04-17 05:41:19 -07:00
+								    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
-												feat(session): add guild_id/parent_chat_id/message_id to SessionSource

Groundwork for injecting raw platform identifiers into the agent's
system prompt.  Currently only `thread_id` is exposed as a raw ID —
callers in a Discord thread had to guess `channel_id == thread_id`
(which happens to work because threads are channels in Discord's REST
API) and had no way to reference the parent channel, guild, or the
triggering message.

Adds three optional fields:

- `guild_id` — Discord guild / Slack workspace / Matrix server scope
- `parent_chat_id` — parent channel when chat_id refers to a thread
- `message_id` — ID of the triggering message (pin/reply/react)

Extends `BasePlatformAdapter.build_source()` to accept + forward them
and teaches `to_dict`/`from_dict` to serialize them.  Behaviourally a
no-op: nothing reads the fields yet and they default to None.

											
										
										
											2026-04-25 00:09:12 +05:30
+								    guild_id: Optional[str] = None  # Discord guild / Slack workspace / Matrix server scope
 								    parent_chat_id: Optional[str] = None  # Parent channel when chat_id refers to a thread
 								    message_id: Optional[str] = None  # ID of the triggering message (for pin/reply/react)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    @property
 								    def description(self) -> str:
 								        """Human-readable description of the source."""
 								        if self.platform == Platform.LOCAL:
 								            return "CLI terminal"
 								        parts = []
 								        if self.chat_type == "dm":
 								            parts.append(f"DM with {self.user_name or self.user_id or 'user'}")
 								        elif self.chat_type == "group":
 								            parts.append(f"group: {self.chat_name or self.chat_id}")
 								        elif self.chat_type == "channel":
 								            parts.append(f"channel: {self.chat_name or self.chat_id}")
 								        else:
 								            parts.append(self.chat_name or self.chat_id)
 								        if self.thread_id:
 								            parts.append(f"thread: {self.thread_id}")
 								        return ", ".join(parts)
 								    def to_dict(self) -> Dict[str, Any]:
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								        d = {
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            "platform": self.platform.value,
 								            "chat_id": self.chat_id,
 								            "chat_name": self.chat_name,
 								            "chat_type": self.chat_type,
 								            "user_id": self.user_id,
 								            "user_name": self.user_name,
 								            "thread_id": self.thread_id,
-												feat(gateway): include Discord channel topic in session context

Fixes #163

- Add chat_topic field to SessionSource dataclass
- Update to_dict/from_dict for serialization support
- Add chat_topic parameter to build_source helper
- Extract channel.topic in Discord adapter for messages and slash commands
- Display Channel Topic in system prompt when available
- Normalize empty topics to None

											
										
										
											2026-03-01 03:48:24 -05:00
+								            "chat_topic": self.chat_topic,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        }
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								        if self.user_id_alt:
 								            d["user_id_alt"] = self.user_id_alt
 								        if self.chat_id_alt:
 								            d["chat_id_alt"] = self.chat_id_alt
-												feat(session): add guild_id/parent_chat_id/message_id to SessionSource

Groundwork for injecting raw platform identifiers into the agent's
system prompt.  Currently only `thread_id` is exposed as a raw ID —
callers in a Discord thread had to guess `channel_id == thread_id`
(which happens to work because threads are channels in Discord's REST
API) and had no way to reference the parent channel, guild, or the
triggering message.

Adds three optional fields:

- `guild_id` — Discord guild / Slack workspace / Matrix server scope
- `parent_chat_id` — parent channel when chat_id refers to a thread
- `message_id` — ID of the triggering message (pin/reply/react)

Extends `BasePlatformAdapter.build_source()` to accept + forward them
and teaches `to_dict`/`from_dict` to serialize them.  Behaviourally a
no-op: nothing reads the fields yet and they default to None.

											
										
										
											2026-04-25 00:09:12 +05:30
+								        if self.guild_id:
 								            d["guild_id"] = self.guild_id
 								        if self.parent_chat_id:
 								            d["parent_chat_id"] = self.parent_chat_id
 								        if self.message_id:
 								            d["message_id"] = self.message_id
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								        return d
-												feat(session): add guild_id/parent_chat_id/message_id to SessionSource

Groundwork for injecting raw platform identifiers into the agent's
system prompt.  Currently only `thread_id` is exposed as a raw ID —
callers in a Discord thread had to guess `channel_id == thread_id`
(which happens to work because threads are channels in Discord's REST
API) and had no way to reference the parent channel, guild, or the
triggering message.

Adds three optional fields:

- `guild_id` — Discord guild / Slack workspace / Matrix server scope
- `parent_chat_id` — parent channel when chat_id refers to a thread
- `message_id` — ID of the triggering message (pin/reply/react)

Extends `BasePlatformAdapter.build_source()` to accept + forward them
and teaches `to_dict`/`from_dict` to serialize them.  Behaviourally a
no-op: nothing reads the fields yet and they default to None.

											
										
										
											2026-04-25 00:09:12 +05:30
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    @classmethod
 								    def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
 								        return cls(
 								            platform=Platform(data["platform"]),
 								            chat_id=str(data["chat_id"]),
 								            chat_name=data.get("chat_name"),
 								            chat_type=data.get("chat_type", "dm"),
 								            user_id=data.get("user_id"),
 								            user_name=data.get("user_name"),
 								            thread_id=data.get("thread_id"),
-												feat(gateway): include Discord channel topic in session context

Fixes #163

- Add chat_topic field to SessionSource dataclass
- Update to_dict/from_dict for serialization support
- Add chat_topic parameter to build_source helper
- Extract channel.topic in Discord adapter for messages and slash commands
- Display Channel Topic in system prompt when available
- Normalize empty topics to None

											
										
										
											2026-03-01 03:48:24 -05:00
+								            chat_topic=data.get("chat_topic"),
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								            user_id_alt=data.get("user_id_alt"),
 								            chat_id_alt=data.get("chat_id_alt"),
-												feat(session): add guild_id/parent_chat_id/message_id to SessionSource

Groundwork for injecting raw platform identifiers into the agent's
system prompt.  Currently only `thread_id` is exposed as a raw ID —
callers in a Discord thread had to guess `channel_id == thread_id`
(which happens to work because threads are channels in Discord's REST
API) and had no way to reference the parent channel, guild, or the
triggering message.

Adds three optional fields:

- `guild_id` — Discord guild / Slack workspace / Matrix server scope
- `parent_chat_id` — parent channel when chat_id refers to a thread
- `message_id` — ID of the triggering message (pin/reply/react)

Extends `BasePlatformAdapter.build_source()` to accept + forward them
and teaches `to_dict`/`from_dict` to serialize them.  Behaviourally a
no-op: nothing reads the fields yet and they default to None.

											
										
										
											2026-04-25 00:09:12 +05:30
+								            guild_id=data.get("guild_id"),
 								            parent_chat_id=data.get("parent_chat_id"),
 								            message_id=data.get("message_id"),
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        )
 								@dataclass
 								class SessionContext:
 								    """
 								    Full context for a session, used for dynamic system prompt injection.
 								    The agent receives this information to understand:
 								    - Where messages are coming from
 								    - What platforms are available
 								    - Where it can deliver scheduled task outputs
 								    """
 								    source: SessionSource
 								    connected_platforms: List[Platform]
 								    home_channels: Dict[Platform, HomeChannel]
-												fix(gateway): preserve sender attribution in shared group sessions

Generalize shared multi-user session handling so non-thread group sessions
(group_sessions_per_user=False) get the same treatment as shared threads:
inbound messages are prefixed with [sender name], and the session prompt
shows a multi-user note instead of pinning a single **User:** line into
the cached system prompt.

Before: build_session_key already treated these as shared sessions, but
_prepare_inbound_message_text and build_session_context_prompt only
recognized shared threads — creating cross-user attribution drift and
prompt-cache contamination in shared groups.

- Add is_shared_multi_user_session() helper alongside build_session_key()
  so both the session key and the multi-user branches are driven by the
  same rules (DMs never shared, threads shared unless
  thread_sessions_per_user, groups shared unless group_sessions_per_user).
- Add shared_multi_user_session field to SessionContext, populated by
  build_session_context() from config.
- Use context.shared_multi_user_session in the prompt builder (label is
  'Multi-user thread' when a thread is present, 'Multi-user session'
  otherwise).
- Use the helper in _prepare_inbound_message_text so non-thread shared
  groups also get [sender] prefixes.

Default behavior unchanged: DMs stay single-user, groups with
group_sessions_per_user=True still show the user normally, shared threads
keep their existing multi-user behavior.

Tests (65 passed):
- tests/gateway/test_session.py: new shared non-thread group prompt case.
- tests/gateway/test_shared_group_sender_prefix.py: inbound preprocessing
  for shared non-thread groups and default groups.

											
										
										
											2026-04-21 00:37:26 -07:00
+								    shared_multi_user_session: bool = False
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    # Session metadata
 								    session_key: str = ""
 								    session_id: str = ""
 								    created_at: Optional[datetime] = None
 								    updated_at: Optional[datetime] = None
 								    def to_dict(self) -> Dict[str, Any]:
 								        return {
 								            "source": self.source.to_dict(),
 								            "connected_platforms": [p.value for p in self.connected_platforms],
 								            "home_channels": {
 								                p.value: hc.to_dict() for p, hc in self.home_channels.items()
 								            },
-												fix(gateway): preserve sender attribution in shared group sessions

Generalize shared multi-user session handling so non-thread group sessions
(group_sessions_per_user=False) get the same treatment as shared threads:
inbound messages are prefixed with [sender name], and the session prompt
shows a multi-user note instead of pinning a single **User:** line into
the cached system prompt.

Before: build_session_key already treated these as shared sessions, but
_prepare_inbound_message_text and build_session_context_prompt only
recognized shared threads — creating cross-user attribution drift and
prompt-cache contamination in shared groups.

- Add is_shared_multi_user_session() helper alongside build_session_key()
  so both the session key and the multi-user branches are driven by the
  same rules (DMs never shared, threads shared unless
  thread_sessions_per_user, groups shared unless group_sessions_per_user).
- Add shared_multi_user_session field to SessionContext, populated by
  build_session_context() from config.
- Use context.shared_multi_user_session in the prompt builder (label is
  'Multi-user thread' when a thread is present, 'Multi-user session'
  otherwise).
- Use the helper in _prepare_inbound_message_text so non-thread shared
  groups also get [sender] prefixes.

Default behavior unchanged: DMs stay single-user, groups with
group_sessions_per_user=True still show the user normally, shared threads
keep their existing multi-user behavior.

Tests (65 passed):
- tests/gateway/test_session.py: new shared non-thread group prompt case.
- tests/gateway/test_shared_group_sender_prefix.py: inbound preprocessing
  for shared non-thread groups and default groups.

											
										
										
											2026-04-21 00:37:26 -07:00
+								            "shared_multi_user_session": self.shared_multi_user_session,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            "session_key": self.session_key,
 								            "session_id": self.session_id,
 								            "created_at": self.created_at.isoformat() if self.created_at else None,
 								            "updated_at": self.updated_at.isoformat() if self.updated_at else None,
 								        }
-												fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

											
										
										
											2026-03-16 05:58:34 -07:00
+								_PII_SAFE_PLATFORMS = frozenset({
 								    Platform.WHATSAPP,
 								    Platform.SIGNAL,
 								    Platform.TELEGRAM,
-												fix(bluebubbles): add missing integration points and documentation (#6460)

- hermes_cli/skills_config.py: add platform label for per-platform skill config
- gateway/session.py: add to PII-safe platforms (no mention system)
- website/docs/user-guide/messaging/bluebubbles.md: full setup guide
- website/sidebars.ts: sidebar navigation entry
- 10 docs pages: add BlueBubbles to all platform enumerations
  (env vars, toolsets, cron delivery, gateway internals, etc.)
											
										
										
											2026-04-09 00:19:05 -07:00
+								    Platform.BLUEBUBBLES,
-												fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

											
										
										
											2026-03-16 05:58:34 -07:00
+								})
 								"""Platforms where user IDs can be safely redacted (no in-message mention system
 								that requires raw IDs).  Discord is excluded because mentions use ``<@user_id>``
 								and the LLM needs the real ID to tag users."""
-												feat(tools): make discord/discord_admin opt-in, Discord-only

Both discord (read/participate) and discord_admin (server admin) are now
configurable via `hermes tools` with default-OFF. Previously the core
discord tool (fetch_messages, search_members, create_thread) auto-loaded
on every Discord install with DISCORD_BOT_TOKEN set — 19 tools the user
never opted into.

Adds a platform-scoping mechanism (_TOOLSET_PLATFORM_RESTRICTIONS) so
the discord toolsets only show up in the Discord platform's checklist,
not on CLI/Telegram/Slack/etc. Applied at four gates:
  - _prompt_toolset_checklist: checklist filter
  - _get_platform_tools: resolution filter (both branches)
  - _save_platform_tools: save-time filter (covers 'Configure all
    platforms' and hand-edited config.yaml)
  - tools_disable_enable_command: rejects `hermes tools enable discord`
    on non-Discord platforms with a clear error

build_session_context_prompt now injects the Discord IDs block only
when both conditions hold: the discord/discord_admin toolset is
enabled AND DISCORD_BOT_TOKEN is set. Toolset alone isn't enough —
the tool's check_fn gates on the token at registry time, so opting
in without a token yields no tools and the IDs block would lie.
Otherwise keep the stale-API disclaimer.

											
										
										
											2026-04-25 03:48:03 -07:00
+								def _discord_tools_loaded() -> bool:
 								    """True iff the agent will actually have Discord tools this session.
 								    Two conditions must hold:
 . The `discord` or `discord_admin` toolset is enabled for the
 								         Discord platform via `hermes tools` (opt-in, default OFF).
 . `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
 								         at registry time, so the toolset being enabled in config is not
 								         enough if the token isn't configured.
 								    Returns False (safe default — keeps the stale-API disclaimer) on any
 								    error so a bad config can't silently promise tools the agent lacks.
 								    """
 								    if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
 								        return False
 								    try:
 								        from hermes_cli.config import load_config
 								        from hermes_cli.tools_config import _get_platform_tools
 								        cfg = load_config()
 								        enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
 								        return "discord" in enabled or "discord_admin" in enabled
 								    except Exception:
 								        return False
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								def build_session_context_prompt(
 								    context: SessionContext,
 								    *,
 								    redact_pii: bool = False,
 								) -> str:
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    """
 								    Build the dynamic system prompt section that tells the agent about its context.
 								    This is injected into the system prompt so the agent knows:
 								    - Where messages are coming from
 								    - What platforms are connected
 								    - Where it can deliver scheduled task outputs
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
-												fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

											
										
										
											2026-03-16 05:58:34 -07:00
+								    When *redact_pii* is True **and** the source platform is in
 								    ``_PII_SAFE_PLATFORMS``, phone numbers are stripped and user/chat IDs
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								    are replaced with deterministic hashes before being sent to the LLM.
-												fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

											
										
										
											2026-03-16 05:58:34 -07:00
+								    Platforms like Discord are excluded because mentions need real IDs.
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								    Routing still uses the original values (they stay in SessionSource).
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    """
-												fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

											
										
										
											2026-03-16 05:58:34 -07:00
+								    # Only apply redaction on platforms where IDs aren't needed for mentions
 								    redact_pii = redact_pii and context.source.platform in _PII_SAFE_PLATFORMS
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    lines = [
 								        "## Current Session Context",
 								        "",
 								    ]
 								    # Source info
 								    platform_name = context.source.platform.value.title()
 								    if context.source.platform == Platform.LOCAL:
 								        lines.append(f"**Source:** {platform_name} (the machine running this agent)")
 								    else:
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								        # Build a description that respects PII redaction
 								        src = context.source
 								        if redact_pii:
 								            # Build a safe description without raw IDs
 								            _uname = src.user_name or (
 								                _hash_sender_id(src.user_id) if src.user_id else "user"
 								            )
 								            _cname = src.chat_name or _hash_chat_id(src.chat_id)
 								            if src.chat_type == "dm":
 								                desc = f"DM with {_uname}"
 								            elif src.chat_type == "group":
 								                desc = f"group: {_cname}"
 								            elif src.chat_type == "channel":
 								                desc = f"channel: {_cname}"
 								            else:
 								                desc = _cname
 								        else:
 								            desc = src.description
 								        lines.append(f"**Source:** {platform_name} ({desc})")
-												feat(gateway): include Discord channel topic in session context

Fixes #163

- Add chat_topic field to SessionSource dataclass
- Update to_dict/from_dict for serialization support
- Add chat_topic parameter to build_source helper
- Extract channel.topic in Discord adapter for messages and slash commands
- Display Channel Topic in system prompt when available
- Normalize empty topics to None

											
										
										
											2026-03-01 03:48:24 -05:00
 								    # Channel topic (if available - provides context about the channel's purpose)
 								    if context.source.chat_topic:
 								        lines.append(f"**Channel Topic:** {context.source.chat_topic}")
-												fix(whatsapp): per-contact DM session isolation and user identity in context

											
										
										
											2026-02-26 12:44:09 -03:00
-												feat: shared thread sessions by default — multi-user thread support (#5391)

Threads (Telegram forum topics, Discord threads, Slack threads) now default
to shared sessions where all participants see the same conversation. This is
the expected UX for threaded conversations where multiple users @mention the
bot and interact collaboratively.

Changes:
- build_session_key(): when thread_id is present, user_id is no longer
  appended to the session key (threads are shared by default)
- New config: thread_sessions_per_user (default: false) — opt-in to restore
  per-user isolation in threads if needed
- Sender attribution: messages in shared threads are prefixed with
  [sender name] so the agent can tell participants apart
- System prompt: shared threads show 'Multi-user thread' note instead of
  a per-turn User line (avoids busting prompt cache)
- Wired through all callers: gateway/run.py, base.py, telegram.py, feishu.py
- Regular group messages (no thread) remain per-user isolated (unchanged)
- DM threads are unaffected (they have their own keying logic)

Closes community request from demontut_ re: thread-based shared sessions.
											
										
										
											2026-04-05 19:46:58 -07:00
+								    # User identity.
-												fix(gateway): preserve sender attribution in shared group sessions

Generalize shared multi-user session handling so non-thread group sessions
(group_sessions_per_user=False) get the same treatment as shared threads:
inbound messages are prefixed with [sender name], and the session prompt
shows a multi-user note instead of pinning a single **User:** line into
the cached system prompt.

Before: build_session_key already treated these as shared sessions, but
_prepare_inbound_message_text and build_session_context_prompt only
recognized shared threads — creating cross-user attribution drift and
prompt-cache contamination in shared groups.

- Add is_shared_multi_user_session() helper alongside build_session_key()
  so both the session key and the multi-user branches are driven by the
  same rules (DMs never shared, threads shared unless
  thread_sessions_per_user, groups shared unless group_sessions_per_user).
- Add shared_multi_user_session field to SessionContext, populated by
  build_session_context() from config.
- Use context.shared_multi_user_session in the prompt builder (label is
  'Multi-user thread' when a thread is present, 'Multi-user session'
  otherwise).
- Use the helper in _prepare_inbound_message_text so non-thread shared
  groups also get [sender] prefixes.

Default behavior unchanged: DMs stay single-user, groups with
group_sessions_per_user=True still show the user normally, shared threads
keep their existing multi-user behavior.

Tests (65 passed):
- tests/gateway/test_session.py: new shared non-thread group prompt case.
- tests/gateway/test_shared_group_sender_prefix.py: inbound preprocessing
  for shared non-thread groups and default groups.

											
										
										
											2026-04-21 00:37:26 -07:00
+								    # In shared multi-user sessions (shared threads OR shared non-thread groups
 								    # when group_sessions_per_user=False), multiple users contribute to the same
 								    # conversation.  Don't pin a single user name in the system prompt — it
 								    # changes per-turn and would bust the prompt cache.  Instead, note that
 								    # this is a multi-user session; individual sender names are prefixed on
 								    # each user message by the gateway.
 								    if context.shared_multi_user_session:
 								        session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
-												feat: shared thread sessions by default — multi-user thread support (#5391)

Threads (Telegram forum topics, Discord threads, Slack threads) now default
to shared sessions where all participants see the same conversation. This is
the expected UX for threaded conversations where multiple users @mention the
bot and interact collaboratively.

Changes:
- build_session_key(): when thread_id is present, user_id is no longer
  appended to the session key (threads are shared by default)
- New config: thread_sessions_per_user (default: false) — opt-in to restore
  per-user isolation in threads if needed
- Sender attribution: messages in shared threads are prefixed with
  [sender name] so the agent can tell participants apart
- System prompt: shared threads show 'Multi-user thread' note instead of
  a per-turn User line (avoids busting prompt cache)
- Wired through all callers: gateway/run.py, base.py, telegram.py, feishu.py
- Regular group messages (no thread) remain per-user isolated (unchanged)
- DM threads are unaffected (they have their own keying logic)

Closes community request from demontut_ re: thread-based shared sessions.
											
										
										
											2026-04-05 19:46:58 -07:00
+								        lines.append(
-												fix(gateway): preserve sender attribution in shared group sessions

Generalize shared multi-user session handling so non-thread group sessions
(group_sessions_per_user=False) get the same treatment as shared threads:
inbound messages are prefixed with [sender name], and the session prompt
shows a multi-user note instead of pinning a single **User:** line into
the cached system prompt.

Before: build_session_key already treated these as shared sessions, but
_prepare_inbound_message_text and build_session_context_prompt only
recognized shared threads — creating cross-user attribution drift and
prompt-cache contamination in shared groups.

- Add is_shared_multi_user_session() helper alongside build_session_key()
  so both the session key and the multi-user branches are driven by the
  same rules (DMs never shared, threads shared unless
  thread_sessions_per_user, groups shared unless group_sessions_per_user).
- Add shared_multi_user_session field to SessionContext, populated by
  build_session_context() from config.
- Use context.shared_multi_user_session in the prompt builder (label is
  'Multi-user thread' when a thread is present, 'Multi-user session'
  otherwise).
- Use the helper in _prepare_inbound_message_text so non-thread shared
  groups also get [sender] prefixes.

Default behavior unchanged: DMs stay single-user, groups with
group_sessions_per_user=True still show the user normally, shared threads
keep their existing multi-user behavior.

Tests (65 passed):
- tests/gateway/test_session.py: new shared non-thread group prompt case.
- tests/gateway/test_shared_group_sender_prefix.py: inbound preprocessing
  for shared non-thread groups and default groups.

											
										
										
											2026-04-21 00:37:26 -07:00
+								            f"**Session type:** {session_label} — messages are prefixed "
-												feat: shared thread sessions by default — multi-user thread support (#5391)

Threads (Telegram forum topics, Discord threads, Slack threads) now default
to shared sessions where all participants see the same conversation. This is
the expected UX for threaded conversations where multiple users @mention the
bot and interact collaboratively.

Changes:
- build_session_key(): when thread_id is present, user_id is no longer
  appended to the session key (threads are shared by default)
- New config: thread_sessions_per_user (default: false) — opt-in to restore
  per-user isolation in threads if needed
- Sender attribution: messages in shared threads are prefixed with
  [sender name] so the agent can tell participants apart
- System prompt: shared threads show 'Multi-user thread' note instead of
  a per-turn User line (avoids busting prompt cache)
- Wired through all callers: gateway/run.py, base.py, telegram.py, feishu.py
- Regular group messages (no thread) remain per-user isolated (unchanged)
- DM threads are unaffected (they have their own keying logic)

Closes community request from demontut_ re: thread-based shared sessions.
											
										
										
											2026-04-05 19:46:58 -07:00
+								            "with [sender name]. Multiple users may participate."
 								        )
 								    elif context.source.user_name:
-												fix(whatsapp): per-contact DM session isolation and user identity in context

											
										
										
											2026-02-26 12:44:09 -03:00
+								        lines.append(f"**User:** {context.source.user_name}")
 								    elif context.source.user_id:
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								        uid = context.source.user_id
 								        if redact_pii:
 								            uid = _hash_sender_id(uid)
 								        lines.append(f"**User ID:** {uid}")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												fix(gateway): add platform-specific notes to session context prompt (#1184)

Tell the agent what it CANNOT do on Slack and Discord — no searching
channel history, no pinning messages, no managing channels/roles.
Prevents the agent from hallucinating capabilities it doesn't have
and promising actions it can't deliver.

Addresses user feedback: agent says 'I'll search your Slack history'
then goes silent because no Slack-specific tools exist.
											
										
										
											2026-03-13 12:34:11 -07:00
+								    # Platform-specific behavioral notes
 								    if context.source.platform == Platform.SLACK:
 								        lines.append("")
 								        lines.append(
 								            "**Platform notes:** You are running inside Slack. "
 								            "You do NOT have access to Slack-specific APIs — you cannot search "
 								            "channel history, pin/unpin messages, manage channels, or list users. "
-												fix(slack): extract rich_text quotes/lists and link unfurl previews

Slack's modern composer sends messages with a 'blocks' array that
contains rich_text elements. When a user forwards or quotes another
message, the quoted content shows up in the rich_text_quote children
of that array — and is NOT included in the plain 'text' field. The
agent saw only the lossy plain text and was blind to forwarded /
quoted content. Same story for link unfurl previews (Notion, docs,
GitHub, etc.) which Slack puts in the 'attachments' array.

Two fixes in the inbound handler:

1. _extract_text_from_slack_blocks walks rich_text / rich_text_quote /
   rich_text_list / rich_text_preformatted trees and renders readable
   text ('> quoted', '• bullet', code fences), dedupes against the
   plain text field, and appends the extracted content so the agent
   sees everything.

2. Link unfurl / attachment preview extraction reads title, url,
   body, and footer from the 'attachments' array and appends a
   '📎 [title](url)\n   body\n   _footer_' section per preview.
   Skips is_msg_unfurl to avoid echoing our own Slack replies back.

Routing is careful not to trust augmented text: mention gating
(is_mentioned) and slash-command detection both run against the
original 'text' field, so forwarded content containing '<@bot>' or
'/deploy' in a quote can't trick the bot into responding in a
channel it shouldn't or classifying a normal message as a command.

Adjustment from original PR: dropped _serialize_slack_blocks_for_agent,
which inlined a redacted JSON dump of non-rich_text blocks (section,
accessory, actions, etc.) — the agent would see the raw Block Kit
structure for UI-heavy alerts. It added up to 6000 characters to the
prompt context on every qualifying message with no opt-out. The
rich_text extraction and attachment unfurls cover the common bug-fix
case (quoted/forwarded content + link previews) without the prefill
tax. If a user needs block inspection later, it can return as a
config opt-in.

Also updates the Slack platform notes in session.py to accurately
describe what the gateway inlines.

											
										
										
											2026-04-26 13:02:27 -07:00
+								            "Do not promise to perform these actions. The gateway may inline the "
 								            "current message's Slack block/attachment payload when available, but "
 								            "you still cannot call Slack APIs yourself."
-												fix(gateway): add platform-specific notes to session context prompt (#1184)

Tell the agent what it CANNOT do on Slack and Discord — no searching
channel history, no pinning messages, no managing channels/roles.
Prevents the agent from hallucinating capabilities it doesn't have
and promising actions it can't deliver.

Addresses user feedback: agent says 'I'll search your Slack history'
then goes silent because no Slack-specific tools exist.
											
										
										
											2026-03-13 12:34:11 -07:00
+								        )
 								    elif context.source.platform == Platform.DISCORD:
-												feat(tools): make discord/discord_admin opt-in, Discord-only

Both discord (read/participate) and discord_admin (server admin) are now
configurable via `hermes tools` with default-OFF. Previously the core
discord tool (fetch_messages, search_members, create_thread) auto-loaded
on every Discord install with DISCORD_BOT_TOKEN set — 19 tools the user
never opted into.

Adds a platform-scoping mechanism (_TOOLSET_PLATFORM_RESTRICTIONS) so
the discord toolsets only show up in the Discord platform's checklist,
not on CLI/Telegram/Slack/etc. Applied at four gates:
  - _prompt_toolset_checklist: checklist filter
  - _get_platform_tools: resolution filter (both branches)
  - _save_platform_tools: save-time filter (covers 'Configure all
    platforms' and hand-edited config.yaml)
  - tools_disable_enable_command: rejects `hermes tools enable discord`
    on non-Discord platforms with a clear error

build_session_context_prompt now injects the Discord IDs block only
when both conditions hold: the discord/discord_admin toolset is
enabled AND DISCORD_BOT_TOKEN is set. Toolset alone isn't enough —
the tool's check_fn gates on the token at registry time, so opting
in without a token yields no tools and the IDs block would lie.
Otherwise keep the stale-API disclaimer.

											
										
										
											2026-04-25 03:48:03 -07:00
+								        # Inject the Discord IDs block only when the agent actually has
 								        # Discord tools loaded this session — i.e. the user opted into
 								        # `discord` / `discord_admin` via `hermes tools` AND the bot
 								        # token is configured.  Otherwise keep the stale-API disclaimer
 								        # honest so we never promise tools the agent lacks.
 								        if _discord_tools_loaded():
-												feat(session): inject Discord IDs block when discord tool is loaded

When DISCORD_BOT_TOKEN is set — meaning the discord tool actually
loads — emit a dedicated IDs block in the session context prompt so
the agent can call ``fetch_messages``, ``pin_message``, etc. with
real identifiers instead of probing.

Currently only ``thread_id`` was exposed as a raw ID (via the
``description`` string).  The agent in a Discord thread had to guess
that the thread ID doubles as a channel ID for the REST API (it
does), and it had no way to reference the parent channel, the guild,
or the triggering message at all.

The block adapts to context:

  - Thread:     guild / parent channel / thread / message
  - Channel:    guild / channel / message
  - (DM has no guild/channel IDs worth listing; only message)

Discord isn't in _PII_SAFE_PLATFORMS, so IDs ship unredacted.

											
										
										
											2026-04-25 00:11:26 +05:30
+								            src = context.source
 								            id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
 								            if src.guild_id:
 								                id_lines.append(f"  - Guild: `{src.guild_id}`")
 								            if src.thread_id and src.parent_chat_id:
 								                id_lines.append(f"  - Parent channel: `{src.parent_chat_id}`")
 								                id_lines.append(f"  - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
 								            else:
 								                id_lines.append(f"  - Channel: `{src.chat_id}`")
 								            if src.message_id:
 								                id_lines.append(f"  - Triggering message: `{src.message_id}`")
 								            lines.extend(id_lines)
 								        else:
-												fix(session): gate stale "no Discord APIs" note on DISCORD_BOT_TOKEN

The Discord platform note in the session context prompt claimed the
agent has no server-management APIs — pre-dating the discord tool.
With a bot token configured the agent actually has fetch_messages,
search_members, create_thread, and optionally the discord_admin tool;
telling the model otherwise causes it to refuse or apologise for
calls it is fully able to make.

Gate the disclaimer on DISCORD_BOT_TOKEN being unset, matching the
tool's own ``check_fn``.  Without a token the note still appears and
remains accurate; with a token the model is no longer gaslit into
refusing valid tool calls.

											
										
										
											2026-04-25 00:10:32 +05:30
+								            lines.append("")
 								            lines.append(
 								                "**Platform notes:** You are running inside Discord. "
 								                "You do NOT have access to Discord-specific APIs — you cannot search "
 								                "channel history, pin messages, manage roles, or list server members. "
 								                "Do not promise to perform these actions. If the user asks, explain "
 								                "that you can only read messages sent directly to you and respond."
 								            )
-												fix(gateway/bluebubbles): align iMessage delivery with non-editable UX

											
										
										
											2026-04-22 09:25:10 -04:00
+								    elif context.source.platform == Platform.BLUEBUBBLES:
 								        lines.append("")
 								        lines.append(
 								            "**Platform notes:** You are responding via iMessage. "
 								            "Keep responses short and conversational — think texts, not essays. "
 								            "Structure longer replies as separate short thoughts, each separated "
 								            "by a blank line (double newline). Each block between blank lines "
 								            "will be delivered as its own iMessage bubble, so write accordingly: "
 								            "one idea per bubble, 1–3 sentences each. "
 								            "If the user needs a detailed answer, give the short version first "
 								            "and offer to elaborate."
 								        )
-												yuanbao platform (#16298)

Co-authored-by: loongzhao <loongzhao@tencent.com>
											
										
										
											2026-04-26 18:50:49 -07:00
+								    elif context.source.platform == Platform.YUANBAO:
 								        lines.append("")
 								        lines.append(
 								            "**Platform notes:** You are running inside Yuanbao. "
 								            "You CAN send private (DM) messages via the send_message tool. "
 								            "Use target='yuanbao:direct:<account_id>' for DM "
 								            "and target='yuanbao:group:<group_code>' for group chat."
 								        )
-												fix(gateway): add platform-specific notes to session context prompt (#1184)

Tell the agent what it CANNOT do on Slack and Discord — no searching
channel history, no pinning messages, no managing channels/roles.
Prevents the agent from hallucinating capabilities it doesn't have
and promising actions it can't deliver.

Addresses user feedback: agent says 'I'll search your Slack history'
then goes silent because no Slack-specific tools exist.
											
										
										
											2026-03-13 12:34:11 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    # Connected platforms
 								    platforms_list = ["local (files on this machine)"]
 								    for p in context.connected_platforms:
 								        if p != Platform.LOCAL:
 								            platforms_list.append(f"{p.value}: Connected ✓")
 								    lines.append(f"**Connected Platforms:** {', '.join(platforms_list)}")
 								    # Home channels
 								    if context.home_channels:
 								        lines.append("")
 								        lines.append("**Home Channels (default destinations):**")
 								        for platform, home in context.home_channels.items():
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								            hc_id = _hash_chat_id(home.chat_id) if redact_pii else home.chat_id
 								            lines.append(f"  - {platform.value}: {home.name} (ID: {hc_id})")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    # Delivery options for scheduled tasks
 								    lines.append("")
 								    lines.append("**Delivery options for scheduled tasks:**")
-												fix(gateway): use profile-aware Hermes paths in runtime hints

											
										
										
											2026-04-15 22:27:36 +03:00
+								    from hermes_constants import display_hermes_home
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    # Origin delivery
 								    if context.source.platform == Platform.LOCAL:
 								        lines.append("- `\"origin\"` → Local output (saved to files)")
 								    else:
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								        _origin_label = context.source.chat_name or (
 								            _hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id
 								        )
 								        lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})")
-												fix(gateway): use profile-aware Hermes paths in runtime hints

											
										
										
											2026-04-15 22:27:36 +03:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    # Local always available
-												fix(gateway): use profile-aware Hermes paths in runtime hints

											
										
										
											2026-04-15 22:27:36 +03:00
+								    lines.append(
 								        f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)"
 								    )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    # Platform home channels
 								    for platform, home in context.home_channels.items():
 								        lines.append(f"- `\"{platform.value}\"` → Home channel ({home.name})")
 								    # Note about explicit targeting
 								    lines.append("")
 								    lines.append("*For explicit targeting, use `\"platform:chat_id\"` format if the user provides a specific chat ID.*")
 								    return "\n".join(lines)
 								@dataclass
 								class SessionEntry:
 								    """
 								    Entry in the session store.
 								    Maps a session key to its current session ID and metadata.
 								    """
 								    session_key: str
 								    session_id: str
 								    created_at: datetime
 								    updated_at: datetime
 								    # Origin metadata for delivery routing
 								    origin: Optional[SessionSource] = None
 								    # Display metadata
 								    display_name: Optional[str] = None
 								    platform: Optional[Platform] = None
 								    chat_type: str = "dm"
 								    # Token tracking
 								    input_tokens: int = 0
 								    output_tokens: int = 0
-												feat: add route-aware pricing estimates (#1695)

Salvaged from PR #1563 by @kshitijk4poor. Cherry-picked with authorship preserved.

- Route-aware pricing architecture replacing static MODEL_PRICING + heuristics
- Canonical usage normalization (Anthropic/OpenAI/Codex API shapes)
- Cache-aware billing (separate cache_read/cache_write rates)
- Cost status tracking (estimated/included/unknown/actual)
- OpenRouter live pricing via models API
- Schema migration v4→v5 with billing metadata columns
- Removed speculative forward-looking entries
- Removed cost display from CLI status bar
- Threaded OpenRouter metadata pre-warm

Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-17 03:44:44 -07:00
+								    cache_read_tokens: int = 0
 								    cache_write_tokens: int = 0
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    total_tokens: int = 0
-												feat: add route-aware pricing estimates (#1695)

Salvaged from PR #1563 by @kshitijk4poor. Cherry-picked with authorship preserved.

- Route-aware pricing architecture replacing static MODEL_PRICING + heuristics
- Canonical usage normalization (Anthropic/OpenAI/Codex API shapes)
- Cache-aware billing (separate cache_read/cache_write rates)
- Cost status tracking (estimated/included/unknown/actual)
- OpenRouter live pricing via models API
- Schema migration v4→v5 with billing metadata columns
- Removed speculative forward-looking entries
- Removed cost display from CLI status bar
- Threaded OpenRouter metadata pre-warm

Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-17 03:44:44 -07:00
+								    estimated_cost_usd: float = 0.0
 								    cost_status: str = "unknown"
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								    # Last API-reported prompt tokens (for accurate compression pre-check)
 								    last_prompt_tokens: int = 0
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								    # Set when a session was created because the previous one expired;
 								    # consumed once by the message handler to inject a notice into context
 								    was_auto_reset: bool = False
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
 								    reset_had_activity: bool = False  # whether the expired session had any messages
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
-												refactor(memory): remove flush_memories entirely (#15696)

The AIAgent.flush_memories pre-compression save, the gateway
_flush_memories_for_session, and everything feeding them are
obsolete now that the background memory/skill review handles
persistent memory extraction.

Problems with flush_memories:

- Pre-dates the background review loop.  It was the only memory-save
  path when introduced; the background review now fires every 10 user
  turns on CLI and gateway alike, which is far more frequent than
  compression or session reset ever triggered flush.
- Blocking and synchronous.  Pre-compression flush ran on the live agent
  before compression, blocking the user-visible response.
- Cache-breaking.  Flush built a temporary conversation prefix
  (system prompt + memory-only tool list) that diverged from the live
  conversation's cached prefix, invalidating prompt caching.  The
  gateway variant spawned a fresh AIAgent with its own clean prompt
  for each finalized session — still cache-breaking, just in a
  different process.
- Redundant.  Background review runs in the live conversation's
  session context, gets the same content, writes to the same memory
  store, and doesn't break the cache.  Everything flush_memories
  claimed to preserve is already covered.

What this removes:

- AIAgent.flush_memories() method (~248 LOC in run_agent.py)
- Pre-compression flush call in _compress_context
- flush_memories call sites in cli.py (/new + exit)
- GatewayRunner._flush_memories_for_session + _async_flush_memories
  (and the 3 call sites: session expiry watcher, /new, /resume)
- 'flush_memories' entry from DEFAULT_CONFIG auxiliary tasks,
  hermes tools UI task list, auxiliary_client docstrings
- _memory_flush_min_turns config + init
- #15631's headroom-deduction math in
  _check_compression_model_feasibility (headroom was only needed
  because flush dragged the full main-agent system prompt along;
  the compression summariser sends a single user-role prompt so
  new_threshold = aux_context is safe again)
- The dedicated test files and assertions that exercised
  flush-specific paths

What this renames (with read-time backcompat on sessions.json):

- SessionEntry.memory_flushed -> SessionEntry.expiry_finalized.
  The session-expiry watcher still uses the flag to avoid re-running
  finalize/eviction on the same expired session; the new name
  reflects what it now actually gates.  from_dict() reads
  'expiry_finalized' first, falls back to the legacy 'memory_flushed'
  key so existing sessions.json files upgrade seamlessly.

Supersedes #15631 and #15638.

Tested: 383 targeted tests pass across run_agent/, agent/, cli/,
and gateway/ session-boundary suites.  No behavior regressions —
background memory review continues to handle persistent memory
extraction on both CLI and gateway.
											
										
										
											2026-04-25 08:21:14 -07:00
+								    # Set by the background expiry watcher after it finalizes an expired
 								    # session (invoking on_session_finalize hooks and evicting the cached
 								    # agent).  Persisted to sessions.json so the flag survives gateway
 								    # restarts — prevents redundant finalization runs.
 								    expiry_finalized: bool = False
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
 								    # When True the next call to get_or_create_session() will auto-reset
 								    # this session (create a new session_id) so the user starts fresh.
 								    # Set by /stop to break stuck-resume loops (#7536).
 								    suspended: bool = False
-												fix(gateway): auto-resume sessions after drain-timeout restart (#11852) (#12301)

The shutdown banner promised "send any message after restart to resume
where you left off" but the code did the opposite: a drain-timeout
restart skipped the .clean_shutdown marker, which made the next startup
call suspend_recently_active(), which marked the session suspended,
which made get_or_create_session() spawn a fresh session_id with a
'Session automatically reset. Use /resume...' notice — contradicting
the banner.

Introduce a resume_pending state on SessionEntry that is distinct from
suspended. Drain-timeout shutdown flags active sessions resume_pending
instead of letting startup-wide suspension destroy them. The next
message on the same session_key preserves the session_id, reloads the
transcript, and the agent receives a reason-aware restart-resume
system note that subsumes the existing tool-tail auto-continue note
(PR #9934).

Terminal escalation still flows through the existing
.restart_failure_counts stuck-loop counter (PR #7536, threshold 3) —
no parallel counter on SessionEntry. suspended still wins over
resume_pending in get_or_create_session() so genuinely stuck sessions
converge to a clean slate.

Spec: PR #11852 (BrennerSpear). Implementation follows the spec with
the approved correction (reuse .restart_failure_counts rather than
adding a resume_attempts field).

Changes:
- gateway/session.py: SessionEntry.resume_pending/resume_reason/
  last_resume_marked_at + to_dict/from_dict; SessionStore
  .mark_resume_pending()/clear_resume_pending(); get_or_create_session()
  returns existing entry when resume_pending (suspended still wins);
  suspend_recently_active() skips resume_pending entries.
- gateway/run.py: _stop_impl() drain-timeout branch marks active
  sessions resume_pending before _interrupt_running_agents();
  _run_agent() injects reason-aware restart-resume system note that
  subsumes the tool-tail case; successful-turn cleanup also clears
  resume_pending next to _clear_restart_failure_count();
  _notify_active_sessions_of_shutdown() softens the restart banner to
  'I'll try to resume where you left off' (honest about stuck-loop
  escalation).
- tests/gateway/test_restart_resume_pending.py: 29 new tests covering
  SessionEntry roundtrip, mark/clear helpers, get_or_create_session
  precedence (suspended > resume_pending), suspend_recently_active
  skip, drain-timeout mark reason (restart vs shutdown), system-note
  injection decision tree (including tool-tail subsumption), banner
  wording, and stuck-loop escalation override.
											
										
										
											2026-04-18 17:32:17 -07:00
 								    # When True the session was interrupted by a gateway restart/shutdown
 								    # drain timeout, but recovery is still expected.  Unlike ``suspended``,
 								    # ``resume_pending`` preserves the existing session_id on next access —
 								    # the user stays on the same transcript and the agent auto-continues
 								    # from where it left off.  Cleared after the next successful turn.
 								    # Escalation to ``suspended`` is handled by the existing
 								    # ``.restart_failure_counts`` stuck-loop counter (#7536), not by a
 								    # parallel counter on this entry.
 								    resume_pending: bool = False
 								    resume_reason: Optional[str] = None  # e.g. "restart_timeout"
 								    last_resume_marked_at: Optional[datetime] = None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    def to_dict(self) -> Dict[str, Any]:
 								        result = {
 								            "session_key": self.session_key,
 								            "session_id": self.session_id,
 								            "created_at": self.created_at.isoformat(),
 								            "updated_at": self.updated_at.isoformat(),
 								            "display_name": self.display_name,
 								            "platform": self.platform.value if self.platform else None,
 								            "chat_type": self.chat_type,
 								            "input_tokens": self.input_tokens,
 								            "output_tokens": self.output_tokens,
-												feat: add route-aware pricing estimates (#1695)

Salvaged from PR #1563 by @kshitijk4poor. Cherry-picked with authorship preserved.

- Route-aware pricing architecture replacing static MODEL_PRICING + heuristics
- Canonical usage normalization (Anthropic/OpenAI/Codex API shapes)
- Cache-aware billing (separate cache_read/cache_write rates)
- Cost status tracking (estimated/included/unknown/actual)
- OpenRouter live pricing via models API
- Schema migration v4→v5 with billing metadata columns
- Removed speculative forward-looking entries
- Removed cost display from CLI status bar
- Threaded OpenRouter metadata pre-warm

Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-17 03:44:44 -07:00
+								            "cache_read_tokens": self.cache_read_tokens,
 								            "cache_write_tokens": self.cache_write_tokens,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            "total_tokens": self.total_tokens,
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								            "last_prompt_tokens": self.last_prompt_tokens,
-												feat: add route-aware pricing estimates (#1695)

Salvaged from PR #1563 by @kshitijk4poor. Cherry-picked with authorship preserved.

- Route-aware pricing architecture replacing static MODEL_PRICING + heuristics
- Canonical usage normalization (Anthropic/OpenAI/Codex API shapes)
- Cache-aware billing (separate cache_read/cache_write rates)
- Cost status tracking (estimated/included/unknown/actual)
- OpenRouter live pricing via models API
- Schema migration v4→v5 with billing metadata columns
- Removed speculative forward-looking entries
- Removed cost display from CLI status bar
- Threaded OpenRouter metadata pre-warm

Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-17 03:44:44 -07:00
+								            "estimated_cost_usd": self.estimated_cost_usd,
 								            "cost_status": self.cost_status,
-												refactor(memory): remove flush_memories entirely (#15696)

The AIAgent.flush_memories pre-compression save, the gateway
_flush_memories_for_session, and everything feeding them are
obsolete now that the background memory/skill review handles
persistent memory extraction.

Problems with flush_memories:

- Pre-dates the background review loop.  It was the only memory-save
  path when introduced; the background review now fires every 10 user
  turns on CLI and gateway alike, which is far more frequent than
  compression or session reset ever triggered flush.
- Blocking and synchronous.  Pre-compression flush ran on the live agent
  before compression, blocking the user-visible response.
- Cache-breaking.  Flush built a temporary conversation prefix
  (system prompt + memory-only tool list) that diverged from the live
  conversation's cached prefix, invalidating prompt caching.  The
  gateway variant spawned a fresh AIAgent with its own clean prompt
  for each finalized session — still cache-breaking, just in a
  different process.
- Redundant.  Background review runs in the live conversation's
  session context, gets the same content, writes to the same memory
  store, and doesn't break the cache.  Everything flush_memories
  claimed to preserve is already covered.

What this removes:

- AIAgent.flush_memories() method (~248 LOC in run_agent.py)
- Pre-compression flush call in _compress_context
- flush_memories call sites in cli.py (/new + exit)
- GatewayRunner._flush_memories_for_session + _async_flush_memories
  (and the 3 call sites: session expiry watcher, /new, /resume)
- 'flush_memories' entry from DEFAULT_CONFIG auxiliary tasks,
  hermes tools UI task list, auxiliary_client docstrings
- _memory_flush_min_turns config + init
- #15631's headroom-deduction math in
  _check_compression_model_feasibility (headroom was only needed
  because flush dragged the full main-agent system prompt along;
  the compression summariser sends a single user-role prompt so
  new_threshold = aux_context is safe again)
- The dedicated test files and assertions that exercised
  flush-specific paths

What this renames (with read-time backcompat on sessions.json):

- SessionEntry.memory_flushed -> SessionEntry.expiry_finalized.
  The session-expiry watcher still uses the flag to avoid re-running
  finalize/eviction on the same expired session; the new name
  reflects what it now actually gates.  from_dict() reads
  'expiry_finalized' first, falls back to the legacy 'memory_flushed'
  key so existing sessions.json files upgrade seamlessly.

Supersedes #15631 and #15638.

Tested: 383 targeted tests pass across run_agent/, agent/, cli/,
and gateway/ session-boundary suites.  No behavior regressions —
background memory review continues to handle persistent memory
extraction on both CLI and gateway.
											
										
										
											2026-04-25 08:21:14 -07:00
+								            "expiry_finalized": self.expiry_finalized,
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								            "suspended": self.suspended,
-												fix(gateway): auto-resume sessions after drain-timeout restart (#11852) (#12301)

The shutdown banner promised "send any message after restart to resume
where you left off" but the code did the opposite: a drain-timeout
restart skipped the .clean_shutdown marker, which made the next startup
call suspend_recently_active(), which marked the session suspended,
which made get_or_create_session() spawn a fresh session_id with a
'Session automatically reset. Use /resume...' notice — contradicting
the banner.

Introduce a resume_pending state on SessionEntry that is distinct from
suspended. Drain-timeout shutdown flags active sessions resume_pending
instead of letting startup-wide suspension destroy them. The next
message on the same session_key preserves the session_id, reloads the
transcript, and the agent receives a reason-aware restart-resume
system note that subsumes the existing tool-tail auto-continue note
(PR #9934).

Terminal escalation still flows through the existing
.restart_failure_counts stuck-loop counter (PR #7536, threshold 3) —
no parallel counter on SessionEntry. suspended still wins over
resume_pending in get_or_create_session() so genuinely stuck sessions
converge to a clean slate.

Spec: PR #11852 (BrennerSpear). Implementation follows the spec with
the approved correction (reuse .restart_failure_counts rather than
adding a resume_attempts field).

Changes:
- gateway/session.py: SessionEntry.resume_pending/resume_reason/
  last_resume_marked_at + to_dict/from_dict; SessionStore
  .mark_resume_pending()/clear_resume_pending(); get_or_create_session()
  returns existing entry when resume_pending (suspended still wins);
  suspend_recently_active() skips resume_pending entries.
- gateway/run.py: _stop_impl() drain-timeout branch marks active
  sessions resume_pending before _interrupt_running_agents();
  _run_agent() injects reason-aware restart-resume system note that
  subsumes the tool-tail case; successful-turn cleanup also clears
  resume_pending next to _clear_restart_failure_count();
  _notify_active_sessions_of_shutdown() softens the restart banner to
  'I'll try to resume where you left off' (honest about stuck-loop
  escalation).
- tests/gateway/test_restart_resume_pending.py: 29 new tests covering
  SessionEntry roundtrip, mark/clear helpers, get_or_create_session
  precedence (suspended > resume_pending), suspend_recently_active
  skip, drain-timeout mark reason (restart vs shutdown), system-note
  injection decision tree (including tool-tail subsumption), banner
  wording, and stuck-loop escalation override.
											
										
										
											2026-04-18 17:32:17 -07:00
+								            "resume_pending": self.resume_pending,
 								            "resume_reason": self.resume_reason,
 								            "last_resume_marked_at": (
 								                self.last_resume_marked_at.isoformat()
 								                if self.last_resume_marked_at
 								                else None
 								            ),
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        }
 								        if self.origin:
 								            result["origin"] = self.origin.to_dict()
 								        return result
 								    @classmethod
 								    def from_dict(cls, data: Dict[str, Any]) -> "SessionEntry":
 								        origin = None
 								        if "origin" in data and data["origin"]:
 								            origin = SessionSource.from_dict(data["origin"])
 								        platform = None
 								        if data.get("platform"):
 								            try:
 								                platform = Platform(data["platform"])
-												fix: replace silent exception swallowing with debug logging across tools

Add logger.debug() calls to 27 bare 'except: pass' blocks across 7 core
files, giving visibility into errors that were previously silently
swallowed. This makes it much easier to diagnose user-reported issues
from debug logs.

Files changed:
- tools/terminal_tool.py: 5 catches (stat, termios, fd close, cleanup)
- tools/delegate_tool.py: 7 catches + added logger (spinner, callbacks)
- tools/browser_tool.py: 5 catches (screenshot/recording cleanup, daemon kill)
- tools/code_execution_tool.py: 2 remaining catches (socket, server close)
- gateway/session.py: 2 catches (platform enum parse, temp file cleanup)
- agent/display.py: 2 catches + added logger (JSON parse in failure detect)
- agent/prompt_builder.py: 1 catch (skill description read)

Deliberately kept bare pass for:
- ImportError checks for optional dependencies (terminal_tool.py)
- SystemExit/KeyboardInterrupt handlers
- Spinner _write catch (would spam on every frame when stdout closed)
- process_registry PID-alive check (canonical os.kill(pid,0) pattern)

Extends the pattern from PR #686 (@aydnOktay).

											
										
										
											2026-03-10 06:59:20 -07:00
+								            except ValueError as e:
 								                logger.debug("Unknown platform value %r: %s", data["platform"], e)
-												fix(gateway): auto-resume sessions after drain-timeout restart (#11852) (#12301)

The shutdown banner promised "send any message after restart to resume
where you left off" but the code did the opposite: a drain-timeout
restart skipped the .clean_shutdown marker, which made the next startup
call suspend_recently_active(), which marked the session suspended,
which made get_or_create_session() spawn a fresh session_id with a
'Session automatically reset. Use /resume...' notice — contradicting
the banner.

Introduce a resume_pending state on SessionEntry that is distinct from
suspended. Drain-timeout shutdown flags active sessions resume_pending
instead of letting startup-wide suspension destroy them. The next
message on the same session_key preserves the session_id, reloads the
transcript, and the agent receives a reason-aware restart-resume
system note that subsumes the existing tool-tail auto-continue note
(PR #9934).

Terminal escalation still flows through the existing
.restart_failure_counts stuck-loop counter (PR #7536, threshold 3) —
no parallel counter on SessionEntry. suspended still wins over
resume_pending in get_or_create_session() so genuinely stuck sessions
converge to a clean slate.

Spec: PR #11852 (BrennerSpear). Implementation follows the spec with
the approved correction (reuse .restart_failure_counts rather than
adding a resume_attempts field).

Changes:
- gateway/session.py: SessionEntry.resume_pending/resume_reason/
  last_resume_marked_at + to_dict/from_dict; SessionStore
  .mark_resume_pending()/clear_resume_pending(); get_or_create_session()
  returns existing entry when resume_pending (suspended still wins);
  suspend_recently_active() skips resume_pending entries.
- gateway/run.py: _stop_impl() drain-timeout branch marks active
  sessions resume_pending before _interrupt_running_agents();
  _run_agent() injects reason-aware restart-resume system note that
  subsumes the tool-tail case; successful-turn cleanup also clears
  resume_pending next to _clear_restart_failure_count();
  _notify_active_sessions_of_shutdown() softens the restart banner to
  'I'll try to resume where you left off' (honest about stuck-loop
  escalation).
- tests/gateway/test_restart_resume_pending.py: 29 new tests covering
  SessionEntry roundtrip, mark/clear helpers, get_or_create_session
  precedence (suspended > resume_pending), suspend_recently_active
  skip, drain-timeout mark reason (restart vs shutdown), system-note
  injection decision tree (including tool-tail subsumption), banner
  wording, and stuck-loop escalation override.
											
										
										
											2026-04-18 17:32:17 -07:00
 								        last_resume_marked_at = None
 								        _lrma = data.get("last_resume_marked_at")
 								        if _lrma:
 								            try:
 								                last_resume_marked_at = datetime.fromisoformat(_lrma)
 								            except (TypeError, ValueError):
 								                last_resume_marked_at = None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        return cls(
 								            session_key=data["session_key"],
 								            session_id=data["session_id"],
 								            created_at=datetime.fromisoformat(data["created_at"]),
 								            updated_at=datetime.fromisoformat(data["updated_at"]),
 								            origin=origin,
 								            display_name=data.get("display_name"),
 								            platform=platform,
 								            chat_type=data.get("chat_type", "dm"),
 								            input_tokens=data.get("input_tokens", 0),
 								            output_tokens=data.get("output_tokens", 0),
-												feat: add route-aware pricing estimates (#1695)

Salvaged from PR #1563 by @kshitijk4poor. Cherry-picked with authorship preserved.

- Route-aware pricing architecture replacing static MODEL_PRICING + heuristics
- Canonical usage normalization (Anthropic/OpenAI/Codex API shapes)
- Cache-aware billing (separate cache_read/cache_write rates)
- Cost status tracking (estimated/included/unknown/actual)
- OpenRouter live pricing via models API
- Schema migration v4→v5 with billing metadata columns
- Removed speculative forward-looking entries
- Removed cost display from CLI status bar
- Threaded OpenRouter metadata pre-warm

Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-17 03:44:44 -07:00
+								            cache_read_tokens=data.get("cache_read_tokens", 0),
 								            cache_write_tokens=data.get("cache_write_tokens", 0),
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            total_tokens=data.get("total_tokens", 0),
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								            last_prompt_tokens=data.get("last_prompt_tokens", 0),
-												feat: add route-aware pricing estimates (#1695)

Salvaged from PR #1563 by @kshitijk4poor. Cherry-picked with authorship preserved.

- Route-aware pricing architecture replacing static MODEL_PRICING + heuristics
- Canonical usage normalization (Anthropic/OpenAI/Codex API shapes)
- Cache-aware billing (separate cache_read/cache_write rates)
- Cost status tracking (estimated/included/unknown/actual)
- OpenRouter live pricing via models API
- Schema migration v4→v5 with billing metadata columns
- Removed speculative forward-looking entries
- Removed cost display from CLI status bar
- Threaded OpenRouter metadata pre-warm

Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-17 03:44:44 -07:00
+								            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
 								            cost_status=data.get("cost_status", "unknown"),
-												refactor(memory): remove flush_memories entirely (#15696)

The AIAgent.flush_memories pre-compression save, the gateway
_flush_memories_for_session, and everything feeding them are
obsolete now that the background memory/skill review handles
persistent memory extraction.

Problems with flush_memories:

- Pre-dates the background review loop.  It was the only memory-save
  path when introduced; the background review now fires every 10 user
  turns on CLI and gateway alike, which is far more frequent than
  compression or session reset ever triggered flush.
- Blocking and synchronous.  Pre-compression flush ran on the live agent
  before compression, blocking the user-visible response.
- Cache-breaking.  Flush built a temporary conversation prefix
  (system prompt + memory-only tool list) that diverged from the live
  conversation's cached prefix, invalidating prompt caching.  The
  gateway variant spawned a fresh AIAgent with its own clean prompt
  for each finalized session — still cache-breaking, just in a
  different process.
- Redundant.  Background review runs in the live conversation's
  session context, gets the same content, writes to the same memory
  store, and doesn't break the cache.  Everything flush_memories
  claimed to preserve is already covered.

What this removes:

- AIAgent.flush_memories() method (~248 LOC in run_agent.py)
- Pre-compression flush call in _compress_context
- flush_memories call sites in cli.py (/new + exit)
- GatewayRunner._flush_memories_for_session + _async_flush_memories
  (and the 3 call sites: session expiry watcher, /new, /resume)
- 'flush_memories' entry from DEFAULT_CONFIG auxiliary tasks,
  hermes tools UI task list, auxiliary_client docstrings
- _memory_flush_min_turns config + init
- #15631's headroom-deduction math in
  _check_compression_model_feasibility (headroom was only needed
  because flush dragged the full main-agent system prompt along;
  the compression summariser sends a single user-role prompt so
  new_threshold = aux_context is safe again)
- The dedicated test files and assertions that exercised
  flush-specific paths

What this renames (with read-time backcompat on sessions.json):

- SessionEntry.memory_flushed -> SessionEntry.expiry_finalized.
  The session-expiry watcher still uses the flag to avoid re-running
  finalize/eviction on the same expired session; the new name
  reflects what it now actually gates.  from_dict() reads
  'expiry_finalized' first, falls back to the legacy 'memory_flushed'
  key so existing sessions.json files upgrade seamlessly.

Supersedes #15631 and #15638.

Tested: 383 targeted tests pass across run_agent/, agent/, cli/,
and gateway/ session-boundary suites.  No behavior regressions —
background memory review continues to handle persistent memory
extraction on both CLI and gateway.
											
										
										
											2026-04-25 08:21:14 -07:00
+								            expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								            suspended=data.get("suspended", False),
-												fix(gateway): auto-resume sessions after drain-timeout restart (#11852) (#12301)

The shutdown banner promised "send any message after restart to resume
where you left off" but the code did the opposite: a drain-timeout
restart skipped the .clean_shutdown marker, which made the next startup
call suspend_recently_active(), which marked the session suspended,
which made get_or_create_session() spawn a fresh session_id with a
'Session automatically reset. Use /resume...' notice — contradicting
the banner.

Introduce a resume_pending state on SessionEntry that is distinct from
suspended. Drain-timeout shutdown flags active sessions resume_pending
instead of letting startup-wide suspension destroy them. The next
message on the same session_key preserves the session_id, reloads the
transcript, and the agent receives a reason-aware restart-resume
system note that subsumes the existing tool-tail auto-continue note
(PR #9934).

Terminal escalation still flows through the existing
.restart_failure_counts stuck-loop counter (PR #7536, threshold 3) —
no parallel counter on SessionEntry. suspended still wins over
resume_pending in get_or_create_session() so genuinely stuck sessions
converge to a clean slate.

Spec: PR #11852 (BrennerSpear). Implementation follows the spec with
the approved correction (reuse .restart_failure_counts rather than
adding a resume_attempts field).

Changes:
- gateway/session.py: SessionEntry.resume_pending/resume_reason/
  last_resume_marked_at + to_dict/from_dict; SessionStore
  .mark_resume_pending()/clear_resume_pending(); get_or_create_session()
  returns existing entry when resume_pending (suspended still wins);
  suspend_recently_active() skips resume_pending entries.
- gateway/run.py: _stop_impl() drain-timeout branch marks active
  sessions resume_pending before _interrupt_running_agents();
  _run_agent() injects reason-aware restart-resume system note that
  subsumes the tool-tail case; successful-turn cleanup also clears
  resume_pending next to _clear_restart_failure_count();
  _notify_active_sessions_of_shutdown() softens the restart banner to
  'I'll try to resume where you left off' (honest about stuck-loop
  escalation).
- tests/gateway/test_restart_resume_pending.py: 29 new tests covering
  SessionEntry roundtrip, mark/clear helpers, get_or_create_session
  precedence (suspended > resume_pending), suspend_recently_active
  skip, drain-timeout mark reason (restart vs shutdown), system-note
  injection decision tree (including tool-tail subsumption), banner
  wording, and stuck-loop escalation override.
											
										
										
											2026-04-18 17:32:17 -07:00
+								            resume_pending=data.get("resume_pending", False),
 								            resume_reason=data.get("resume_reason"),
 								            last_resume_marked_at=last_resume_marked_at,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        )
-												fix(gateway): preserve sender attribution in shared group sessions

Generalize shared multi-user session handling so non-thread group sessions
(group_sessions_per_user=False) get the same treatment as shared threads:
inbound messages are prefixed with [sender name], and the session prompt
shows a multi-user note instead of pinning a single **User:** line into
the cached system prompt.

Before: build_session_key already treated these as shared sessions, but
_prepare_inbound_message_text and build_session_context_prompt only
recognized shared threads — creating cross-user attribution drift and
prompt-cache contamination in shared groups.

- Add is_shared_multi_user_session() helper alongside build_session_key()
  so both the session key and the multi-user branches are driven by the
  same rules (DMs never shared, threads shared unless
  thread_sessions_per_user, groups shared unless group_sessions_per_user).
- Add shared_multi_user_session field to SessionContext, populated by
  build_session_context() from config.
- Use context.shared_multi_user_session in the prompt builder (label is
  'Multi-user thread' when a thread is present, 'Multi-user session'
  otherwise).
- Use the helper in _prepare_inbound_message_text so non-thread shared
  groups also get [sender] prefixes.

Default behavior unchanged: DMs stay single-user, groups with
group_sessions_per_user=True still show the user normally, shared threads
keep their existing multi-user behavior.

Tests (65 passed):
- tests/gateway/test_session.py: new shared non-thread group prompt case.
- tests/gateway/test_shared_group_sender_prefix.py: inbound preprocessing
  for shared non-thread groups and default groups.

											
										
										
											2026-04-21 00:37:26 -07:00
+								def is_shared_multi_user_session(
 								    source: SessionSource,
 								    *,
 								    group_sessions_per_user: bool = True,
 								    thread_sessions_per_user: bool = False,
 								) -> bool:
 								    """Return True when a non-DM session is shared across participants.
 								    Mirrors the isolation rules in :func:`build_session_key`:
 								      - DMs are never shared.
 								      - Threads are shared unless ``thread_sessions_per_user`` is True.
 								      - Non-thread group/channel sessions are shared unless
 								        ``group_sessions_per_user`` is True (default: True = isolated).
 								    """
 								    if source.chat_type == "dm":
 								        return False
 								    if source.thread_id:
 								        return not thread_sessions_per_user
 								    return not group_sessions_per_user
-												feat: shared thread sessions by default — multi-user thread support (#5391)

Threads (Telegram forum topics, Discord threads, Slack threads) now default
to shared sessions where all participants see the same conversation. This is
the expected UX for threaded conversations where multiple users @mention the
bot and interact collaboratively.

Changes:
- build_session_key(): when thread_id is present, user_id is no longer
  appended to the session key (threads are shared by default)
- New config: thread_sessions_per_user (default: false) — opt-in to restore
  per-user isolation in threads if needed
- Sender attribution: messages in shared threads are prefixed with
  [sender name] so the agent can tell participants apart
- System prompt: shared threads show 'Multi-user thread' note instead of
  a per-turn User line (avoids busting prompt cache)
- Wired through all callers: gateway/run.py, base.py, telegram.py, feishu.py
- Regular group messages (no thread) remain per-user isolated (unchanged)
- DM threads are unaffected (they have their own keying logic)

Closes community request from demontut_ re: thread-based shared sessions.
											
										
										
											2026-04-05 19:46:58 -07:00
+								def build_session_key(
 								    source: SessionSource,
 								    group_sessions_per_user: bool = True,
 								    thread_sessions_per_user: bool = False,
 								) -> str:
-												refactor: extract build_session_key() as single source of truth

The session key construction logic was duplicated in 4 places
(session.py + 3 inline copies in run.py), which is exactly the
kind of drift that caused issue #349 in the first place.

Extracted build_session_key() as a public function in session.py.
SessionStore._generate_session_key() now delegates to it, and all
inline key construction in run.py has been replaced with calls to
the shared function. Tests updated to test the function directly.

											
										
										
											2026-03-04 03:34:45 -08:00
+								    """Build a deterministic session key from a message source.
 								    This is the single source of truth for session key construction.
-												fix: Slack thread handling — progress messages, responses, and session isolation

Three bugs fixed in the Slack adapter:

1. Tool progress messages leaked to main channel instead of thread.
   Root cause: metadata key mismatch — gateway uses 'thread_id' but
   Slack adapter checked for 'thread_ts'. Added _resolve_thread_ts()
   helper that checks both keys with correct precedence.

2. Bot responses could escape threads for replies.
   Root cause: reply_to was set to the child message's ts, but Slack
   API needs the parent message's ts for thread_ts. Now metadata
   thread_id (always the parent ts) takes priority over reply_to.

3. All Slack DMs shared one session key ('agent:main:slack:dm'),
   so a long-running task blocked all other DM conversations.
   Fix: DMs with thread_id now get per-thread session keys. Top-level
   DMs still share one session for conversation continuity.

Additional fix: All Slack media methods (send_image, send_voice,
send_video, send_document, send_image_file) now accept metadata
parameter for thread routing. Previously they only accepted reply_to,
which caused media to silently fail to post in threads.

Session key behavior after this change:
- Slack channel @mention: creates thread, thread = session
- Slack thread reply: stays in thread, same session
- Slack DM (top-level): one continuous session
- Slack DM (threaded): per-thread session
- Other platforms: unchanged

											
										
										
											2026-03-12 16:05:45 -07:00
 								    DM rules:
-												fix(gateway): enforce chat_id isolation for all DM sessions

											
										
										
											2026-03-12 16:21:49 +01:00
+								      - DMs include chat_id when present, so each private conversation is isolated.
 								      - thread_id further differentiates threaded DMs within the same DM chat.
 								      - Without chat_id, thread_id is used as a best-effort fallback.
 								      - Without thread_id or chat_id, DMs share a single session.
-												fix: Slack thread handling — progress messages, responses, and session isolation

Three bugs fixed in the Slack adapter:

1. Tool progress messages leaked to main channel instead of thread.
   Root cause: metadata key mismatch — gateway uses 'thread_id' but
   Slack adapter checked for 'thread_ts'. Added _resolve_thread_ts()
   helper that checks both keys with correct precedence.

2. Bot responses could escape threads for replies.
   Root cause: reply_to was set to the child message's ts, but Slack
   API needs the parent message's ts for thread_ts. Now metadata
   thread_id (always the parent ts) takes priority over reply_to.

3. All Slack DMs shared one session key ('agent:main:slack:dm'),
   so a long-running task blocked all other DM conversations.
   Fix: DMs with thread_id now get per-thread session keys. Top-level
   DMs still share one session for conversation continuity.

Additional fix: All Slack media methods (send_image, send_voice,
send_video, send_document, send_image_file) now accept metadata
parameter for thread routing. Previously they only accepted reply_to,
which caused media to silently fail to post in threads.

Session key behavior after this change:
- Slack channel @mention: creates thread, thread = session
- Slack thread reply: stays in thread, same session
- Slack DM (top-level): one continuous session
- Slack DM (threaded): per-thread session
- Other platforms: unchanged

											
										
										
											2026-03-12 16:05:45 -07:00
 								    Group/channel rules:
-												fix(gateway): enforce chat_id isolation for all DM sessions

											
										
										
											2026-03-12 16:21:49 +01:00
+								      - chat_id identifies the parent group/channel.
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								      - user_id/user_id_alt isolates participants within that parent chat when available when
 								        ``group_sessions_per_user`` is enabled.
-												feat: shared thread sessions by default — multi-user thread support (#5391)

Threads (Telegram forum topics, Discord threads, Slack threads) now default
to shared sessions where all participants see the same conversation. This is
the expected UX for threaded conversations where multiple users @mention the
bot and interact collaboratively.

Changes:
- build_session_key(): when thread_id is present, user_id is no longer
  appended to the session key (threads are shared by default)
- New config: thread_sessions_per_user (default: false) — opt-in to restore
  per-user isolation in threads if needed
- Sender attribution: messages in shared threads are prefixed with
  [sender name] so the agent can tell participants apart
- System prompt: shared threads show 'Multi-user thread' note instead of
  a per-turn User line (avoids busting prompt cache)
- Wired through all callers: gateway/run.py, base.py, telegram.py, feishu.py
- Regular group messages (no thread) remain per-user isolated (unchanged)
- DM threads are unaffected (they have their own keying logic)

Closes community request from demontut_ re: thread-based shared sessions.
											
										
										
											2026-04-05 19:46:58 -07:00
+								      - thread_id differentiates threads within that parent chat.  When
 								        ``thread_sessions_per_user`` is False (default), threads are *shared* across all
 								        participants — user_id is NOT appended, so every user in the thread
 								        shares a single session.  This is the expected UX for threaded
 								        conversations (Telegram forum topics, Discord threads, Slack threads).
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								      - Without participant identifiers, or when isolation is disabled, messages fall back to one
 								        shared session per chat.
-												fix(gateway): enforce chat_id isolation for all DM sessions

											
										
										
											2026-03-12 16:21:49 +01:00
+								      - Without identifiers, messages fall back to one session per platform/chat_type.
-												refactor: extract build_session_key() as single source of truth

The session key construction logic was duplicated in 4 places
(session.py + 3 inline copies in run.py), which is exactly the
kind of drift that caused issue #349 in the first place.

Extracted build_session_key() as a public function in session.py.
SessionStore._generate_session_key() now delegates to it, and all
inline key construction in run.py has been replaced with calls to
the shared function. Tests updated to test the function directly.

											
										
										
											2026-03-04 03:34:45 -08:00
+								    """
 								    platform = source.platform.value
 								    if source.chat_type == "dm":
-												fix(gateway): canonicalize WhatsApp identity in session keys

Hermes' WhatsApp bridge routinely surfaces the same person under either
a phone-format JID (60123456789@s.whatsapp.net) or a LID (…@lid),
and may flip between the two for a single human within the same
conversation. Before this change, build_session_key used the raw
identifier verbatim, so the bridge reshuffling an alias form produced
two distinct session keys for the same person — in two places:

  1. DM chat_id — a user's DM sessions split in half, transcripts and
     per-sender state diverge.
  2. Group participant_id (with group_sessions_per_user enabled) — a
     member's per-user session inside a group splits in half for the
     same reason.

Add a canonicalizer that walks the bridge's lid-mapping-*.json files
and picks the shortest/numeric-preferred alias as the stable identity.
build_session_key now routes both the DM chat_id and the group
participant_id through this helper when the platform is WhatsApp.
All other platforms and chat types are untouched.

Expose canonical_whatsapp_identifier and normalize_whatsapp_identifier
as public helpers. Plugins that need per-sender behaviour (role-based
routing, per-contact authorization, policy gating) need the same
identity resolution Hermes uses internally; without a public helper,
each plugin would have to re-implement the walker against the bridge's
internal on-disk format. Keeping this alongside build_session_key
makes it authoritative and one refactor away if the bridge ever
changes shape.

_expand_whatsapp_aliases stays private — it's an implementation detail
of how the mapping files are walked, not a contract callers should
depend on.


											
										
										
											2026-04-24 18:40:20 +08:00
+								        dm_chat_id = source.chat_id
 								        if source.platform == Platform.WHATSAPP:
 								            dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
 								        if dm_chat_id:
-												fix(gateway): enforce chat_id isolation for all DM sessions

											
										
										
											2026-03-12 16:21:49 +01:00
+								            if source.thread_id:
-												fix(gateway): canonicalize WhatsApp identity in session keys

Hermes' WhatsApp bridge routinely surfaces the same person under either
a phone-format JID (60123456789@s.whatsapp.net) or a LID (…@lid),
and may flip between the two for a single human within the same
conversation. Before this change, build_session_key used the raw
identifier verbatim, so the bridge reshuffling an alias form produced
two distinct session keys for the same person — in two places:

  1. DM chat_id — a user's DM sessions split in half, transcripts and
     per-sender state diverge.
  2. Group participant_id (with group_sessions_per_user enabled) — a
     member's per-user session inside a group splits in half for the
     same reason.

Add a canonicalizer that walks the bridge's lid-mapping-*.json files
and picks the shortest/numeric-preferred alias as the stable identity.
build_session_key now routes both the DM chat_id and the group
participant_id through this helper when the platform is WhatsApp.
All other platforms and chat types are untouched.

Expose canonical_whatsapp_identifier and normalize_whatsapp_identifier
as public helpers. Plugins that need per-sender behaviour (role-based
routing, per-contact authorization, policy gating) need the same
identity resolution Hermes uses internally; without a public helper,
each plugin would have to re-implement the walker against the bridge's
internal on-disk format. Keeping this alongside build_session_key
makes it authoritative and one refactor away if the bridge ever
changes shape.

_expand_whatsapp_aliases stays private — it's an implementation detail
of how the mapping files are walked, not a contract callers should
depend on.


											
										
										
											2026-04-24 18:40:20 +08:00
+								                return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
 								            return f"agent:main:{platform}:dm:{dm_chat_id}"
-												fix: Slack thread handling — progress messages, responses, and session isolation

Three bugs fixed in the Slack adapter:

1. Tool progress messages leaked to main channel instead of thread.
   Root cause: metadata key mismatch — gateway uses 'thread_id' but
   Slack adapter checked for 'thread_ts'. Added _resolve_thread_ts()
   helper that checks both keys with correct precedence.

2. Bot responses could escape threads for replies.
   Root cause: reply_to was set to the child message's ts, but Slack
   API needs the parent message's ts for thread_ts. Now metadata
   thread_id (always the parent ts) takes priority over reply_to.

3. All Slack DMs shared one session key ('agent:main:slack:dm'),
   so a long-running task blocked all other DM conversations.
   Fix: DMs with thread_id now get per-thread session keys. Top-level
   DMs still share one session for conversation continuity.

Additional fix: All Slack media methods (send_image, send_voice,
send_video, send_document, send_image_file) now accept metadata
parameter for thread routing. Previously they only accepted reply_to,
which caused media to silently fail to post in threads.

Session key behavior after this change:
- Slack channel @mention: creates thread, thread = session
- Slack thread reply: stays in thread, same session
- Slack DM (top-level): one continuous session
- Slack DM (threaded): per-thread session
- Other platforms: unchanged

											
										
										
											2026-03-12 16:05:45 -07:00
+								        if source.thread_id:
 								            return f"agent:main:{platform}:dm:{source.thread_id}"
-												refactor: extract build_session_key() as single source of truth

The session key construction logic was duplicated in 4 places
(session.py + 3 inline copies in run.py), which is exactly the
kind of drift that caused issue #349 in the first place.

Extracted build_session_key() as a public function in session.py.
SessionStore._generate_session_key() now delegates to it, and all
inline key construction in run.py has been replaced with calls to
the shared function. Tests updated to test the function directly.

											
										
										
											2026-03-04 03:34:45 -08:00
+								        return f"agent:main:{platform}:dm"
-												fix(gateway): isolate group sessions per user

Include participant identifiers in non-DM session keys when available so group and channel conversations no longer share one transcript across every active user in the chat.

											
										
										
											2026-03-15 23:08:56 -07:00
 								    participant_id = source.user_id_alt or source.user_id
-												fix(gateway): canonicalize WhatsApp identity in session keys

Hermes' WhatsApp bridge routinely surfaces the same person under either
a phone-format JID (60123456789@s.whatsapp.net) or a LID (…@lid),
and may flip between the two for a single human within the same
conversation. Before this change, build_session_key used the raw
identifier verbatim, so the bridge reshuffling an alias form produced
two distinct session keys for the same person — in two places:

  1. DM chat_id — a user's DM sessions split in half, transcripts and
     per-sender state diverge.
  2. Group participant_id (with group_sessions_per_user enabled) — a
     member's per-user session inside a group splits in half for the
     same reason.

Add a canonicalizer that walks the bridge's lid-mapping-*.json files
and picks the shortest/numeric-preferred alias as the stable identity.
build_session_key now routes both the DM chat_id and the group
participant_id through this helper when the platform is WhatsApp.
All other platforms and chat types are untouched.

Expose canonical_whatsapp_identifier and normalize_whatsapp_identifier
as public helpers. Plugins that need per-sender behaviour (role-based
routing, per-contact authorization, policy gating) need the same
identity resolution Hermes uses internally; without a public helper,
each plugin would have to re-implement the walker against the bridge's
internal on-disk format. Keeping this alongside build_session_key
makes it authoritative and one refactor away if the bridge ever
changes shape.

_expand_whatsapp_aliases stays private — it's an implementation detail
of how the mapping files are walked, not a contract callers should
depend on.


											
										
										
											2026-04-24 18:40:20 +08:00
+								    if participant_id and source.platform == Platform.WHATSAPP:
 								        # Same JID/LID-flip bug as the DM case: without canonicalisation, a
 								        # single group member gets two isolated per-user sessions when the
 								        # bridge reshuffles alias forms.
 								        participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
-												fix(gateway): isolate group sessions per user

Include participant identifiers in non-DM session keys when available so group and channel conversations no longer share one transcript across every active user in the chat.

											
										
										
											2026-03-15 23:08:56 -07:00
+								    key_parts = ["agent:main", platform, source.chat_type]
-												fix(gateway): enforce chat_id isolation for all DM sessions

											
										
										
											2026-03-12 16:21:49 +01:00
+								    if source.chat_id:
-												fix(gateway): isolate group sessions per user

Include participant identifiers in non-DM session keys when available so group and channel conversations no longer share one transcript across every active user in the chat.

											
										
										
											2026-03-15 23:08:56 -07:00
+								        key_parts.append(source.chat_id)
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								    if source.thread_id:
-												fix(gateway): isolate group sessions per user

Include participant identifiers in non-DM session keys when available so group and channel conversations no longer share one transcript across every active user in the chat.

											
										
										
											2026-03-15 23:08:56 -07:00
+								        key_parts.append(source.thread_id)
-												feat: shared thread sessions by default — multi-user thread support (#5391)

Threads (Telegram forum topics, Discord threads, Slack threads) now default
to shared sessions where all participants see the same conversation. This is
the expected UX for threaded conversations where multiple users @mention the
bot and interact collaboratively.

Changes:
- build_session_key(): when thread_id is present, user_id is no longer
  appended to the session key (threads are shared by default)
- New config: thread_sessions_per_user (default: false) — opt-in to restore
  per-user isolation in threads if needed
- Sender attribution: messages in shared threads are prefixed with
  [sender name] so the agent can tell participants apart
- System prompt: shared threads show 'Multi-user thread' note instead of
  a per-turn User line (avoids busting prompt cache)
- Wired through all callers: gateway/run.py, base.py, telegram.py, feishu.py
- Regular group messages (no thread) remain per-user isolated (unchanged)
- DM threads are unaffected (they have their own keying logic)

Closes community request from demontut_ re: thread-based shared sessions.
											
										
										
											2026-04-05 19:46:58 -07:00
 								    # In threads, default to shared sessions (all participants see the same
 								    # conversation).  Per-user isolation only applies when explicitly enabled
 								    # via thread_sessions_per_user, or when there is no thread (regular group).
 								    isolate_user = group_sessions_per_user
 								    if source.thread_id and not thread_sessions_per_user:
 								        isolate_user = False
 								    if isolate_user and participant_id:
-												fix(gateway): isolate group sessions per user

Include participant identifiers in non-DM session keys when available so group and channel conversations no longer share one transcript across every active user in the chat.

											
										
										
											2026-03-15 23:08:56 -07:00
+								        key_parts.append(str(participant_id))
 								    return ":".join(key_parts)
-												refactor: extract build_session_key() as single source of truth

The session key construction logic was duplicated in 4 places
(session.py + 3 inline copies in run.py), which is exactly the
kind of drift that caused issue #349 in the first place.

Extracted build_session_key() as a public function in session.py.
SessionStore._generate_session_key() now delegates to it, and all
inline key construction in run.py has been replaced with calls to
the shared function. Tests updated to test the function directly.

											
										
										
											2026-03-04 03:34:45 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								class SessionStore:
 								    """
 								    Manages session storage and retrieval.
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								    Uses SQLite (via SessionDB) for session metadata and message transcripts.
 								    Falls back to legacy JSONL files if SQLite is unavailable.
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    """
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								    def __init__(self, sessions_dir: Path, config: GatewayConfig,
-												fix: remove 115 verified dead code symbols across 46 production files

Automated dead code audit using vulture + coverage.py + ast-grep intersection,
confirmed by Opus deep verification pass. Every symbol verified to have zero
production callers (test imports excluded from reachability analysis).

Removes ~1,534 lines of dead production code across 46 files and ~1,382 lines
of stale test code. 3 entire files deleted (agent/builtin_memory_provider.py,
hermes_cli/checklist.py, tests/hermes_cli/test_setup_model_selection.py).

Co-authored-by: alt-glitch <balyan.sid@gmail.com>

											
										
										
											2026-04-10 03:03:30 -07:00
+								                 has_active_processes_fn=None):
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self.sessions_dir = sessions_dir
 								        self.config = config
 								        self._entries: Dict[str, SessionEntry] = {}
 								        self._loaded = False
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        self._lock = threading.Lock()
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        self._has_active_processes_fn = has_active_processes_fn
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
 								        # Initialize SQLite session database
 								        self._db = None
 								        try:
 								            from hermes_state import SessionDB
 								            self._db = SessionDB()
 								        except Exception as e:
 								            print(f"[gateway] Warning: SQLite session store unavailable, falling back to JSONL: {e}")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    def _ensure_loaded(self) -> None:
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        """Load sessions index from disk if not already loaded."""
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        with self._lock:
 								            self._ensure_loaded_locked()
 								    def _ensure_loaded_locked(self) -> None:
 								        """Load sessions index from disk. Must be called with self._lock held."""
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        if self._loaded:
 								            return
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self.sessions_dir.mkdir(parents=True, exist_ok=True)
 								        sessions_file = self.sessions_dir / "sessions.json"
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        if sessions_file.exists():
 								            try:
-												fix(gateway): add missing UTF-8 encoding to file I/O preventing crashes on Windows

On Windows, Python's open() defaults to the system locale encoding
(e.g. cp1254 for Turkish, cp1252 for Western European) instead of
UTF-8. The gateway already uses ensure_ascii=False in json.dumps()
to preserve Unicode characters in chat messages, but the
corresponding open() calls lack encoding="utf-8". This mismatch
causes UnicodeEncodeError / UnicodeDecodeError when users send
non-ASCII messages (Turkish, Japanese, Arabic, emoji, etc.) through
Telegram, Discord, WhatsApp, or Slack on Windows.

The project already fixed this for .env files in hermes_cli/config.py
(line 624) but the gateway module was missed.

Files fixed:
- gateway/session.py: session index + JSONL transcript read/write (5 calls)
- gateway/channel_directory.py: channel directory read/write (3 calls)
- gateway/mirror.py: session index read + transcript append (2 calls)

											
										
										
											2026-03-04 11:32:57 +03:00
+								                with open(sessions_file, "r", encoding="utf-8") as f:
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                    data = json.load(f)
 								                    for key, entry_data in data.items():
-												fix: address PR review round 4 — remove web UI, fix audio/import/interface issues

Remove web UI gateway (web.py, tests, docs, toolset, env vars, Platform.WEB
enum) per maintainer request — Nous is building their own official chat UI.

Fix 1: Replace sd.wait() with polling pattern in play_audio_file() to prevent
indefinite hang when audio device stalls (consistent with play_beep()).

Fix 2: Use importlib.util.find_spec() for faster_whisper/openai availability
checks instead of module-level imports that trigger heavy native library
loading (CUDA/cuDNN) at import time.

Fix 3: Remove inspect.signature() hack in _send_voice_reply() — add **kwargs
to Telegram send_voice() so all adapters accept metadata uniformly.

Fix 4: Make session loading resilient to removed platform enum values — skip
entries with unknown platforms instead of crashing the entire gateway.

											
										
										
											2026-03-14 09:06:52 +03:00
+								                        try:
 								                            self._entries[key] = SessionEntry.from_dict(entry_data)
 								                        except (ValueError, KeyError):
 								                            # Skip entries with unknown/removed platform values
 								                            continue
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            except Exception as e:
 								                print(f"[gateway] Warning: Failed to load sessions: {e}")
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self._loaded = True
 								    def _save(self) -> None:
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        """Save sessions index to disk (kept for session key -> ID mapping)."""
-												fix(session): atomic write for sessions.json to prevent data loss on crash

											
										
										
											2026-03-07 20:54:45 +03:30
+								        import tempfile
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self.sessions_dir.mkdir(parents=True, exist_ok=True)
 								        sessions_file = self.sessions_dir / "sessions.json"
-												fix(session): atomic write for sessions.json to prevent data loss on crash

											
										
										
											2026-03-07 20:54:45 +03:30
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        data = {key: entry.to_dict() for key, entry in self._entries.items()}
-												fix(session): atomic write for sessions.json to prevent data loss on crash

											
										
										
											2026-03-07 20:54:45 +03:30
+								        fd, tmp_path = tempfile.mkstemp(
 								            dir=str(self.sessions_dir), suffix=".tmp", prefix=".sessions_"
 								        )
 								        try:
-												Merge PR #611: fix(session): atomic write for sessions.json to prevent data loss on crash

Authored by alireza78a.

Replaces open('w') + json.dump with tempfile.mkstemp + os.replace atomic
write pattern, matching the existing pattern in cron/jobs.py. Prevents
silent session loss if the process crashes or gets OOM-killed mid-write.

Resolved conflict: kept encoding='utf-8' from HEAD in the new fdopen call.

											
										
										
											2026-03-10 04:18:53 -07:00
+								            with os.fdopen(fd, "w", encoding="utf-8") as f:
-												fix(session): atomic write for sessions.json to prevent data loss on crash

											
										
										
											2026-03-07 20:54:45 +03:30
+								                json.dump(data, f, indent=2)
 								                f.flush()
 								                os.fsync(f.fileno())
 								            os.replace(tmp_path, sessions_file)
 								        except BaseException:
 								            try:
 								                os.unlink(tmp_path)
-												fix: replace silent exception swallowing with debug logging across tools

Add logger.debug() calls to 27 bare 'except: pass' blocks across 7 core
files, giving visibility into errors that were previously silently
swallowed. This makes it much easier to diagnose user-reported issues
from debug logs.

Files changed:
- tools/terminal_tool.py: 5 catches (stat, termios, fd close, cleanup)
- tools/delegate_tool.py: 7 catches + added logger (spinner, callbacks)
- tools/browser_tool.py: 5 catches (screenshot/recording cleanup, daemon kill)
- tools/code_execution_tool.py: 2 remaining catches (socket, server close)
- gateway/session.py: 2 catches (platform enum parse, temp file cleanup)
- agent/display.py: 2 catches + added logger (JSON parse in failure detect)
- agent/prompt_builder.py: 1 catch (skill description read)

Deliberately kept bare pass for:
- ImportError checks for optional dependencies (terminal_tool.py)
- SystemExit/KeyboardInterrupt handlers
- Spinner _write catch (would spam on every frame when stdout closed)
- process_registry PID-alive check (canonical os.kill(pid,0) pattern)

Extends the pattern from PR #686 (@aydnOktay).

											
										
										
											2026-03-10 06:59:20 -07:00
+								            except OSError as e:
 								                logger.debug("Could not remove temp file %s: %s", tmp_path, e)
-												fix(session): atomic write for sessions.json to prevent data loss on crash

											
										
										
											2026-03-07 20:54:45 +03:30
+								            raise
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    def _generate_session_key(self, source: SessionSource) -> str:
 								        """Generate a session key from a source."""
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        return build_session_key(
 								            source,
 								            group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
-												feat: shared thread sessions by default — multi-user thread support (#5391)

Threads (Telegram forum topics, Discord threads, Slack threads) now default
to shared sessions where all participants see the same conversation. This is
the expected UX for threaded conversations where multiple users @mention the
bot and interact collaboratively.

Changes:
- build_session_key(): when thread_id is present, user_id is no longer
  appended to the session key (threads are shared by default)
- New config: thread_sessions_per_user (default: false) — opt-in to restore
  per-user isolation in threads if needed
- Sender attribution: messages in shared threads are prefixed with
  [sender name] so the agent can tell participants apart
- System prompt: shared threads show 'Multi-user thread' note instead of
  a per-turn User line (avoids busting prompt cache)
- Wired through all callers: gateway/run.py, base.py, telegram.py, feishu.py
- Regular group messages (no thread) remain per-user isolated (unchanged)
- DM threads are unaffected (they have their own keying logic)

Closes community request from demontut_ re: thread-based shared sessions.
											
										
										
											2026-04-05 19:46:58 -07:00
+								            thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								    def _is_session_expired(self, entry: SessionEntry) -> bool:
 								        """Check if a session has expired based on its reset policy.
 								        Works from the entry alone — no SessionSource needed.
 								        Used by the background expiry watcher to proactively flush memories.
 								        Sessions with active background processes are never considered expired.
 								        """
 								        if self._has_active_processes_fn:
 								            if self._has_active_processes_fn(entry.session_key):
 								                return False
 								        policy = self.config.get_reset_policy(
 								            platform=entry.platform,
 								            session_type=entry.chat_type,
 								        )
 								        if policy.mode == "none":
 								            return False
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        now = _now()
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
 								        if policy.mode in ("idle", "both"):
 								            idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
 								            if now > idle_deadline:
 								                return True
 								        if policy.mode in ("daily", "both"):
 								            today_reset = now.replace(
 								                hour=policy.at_hour,
 								                minute=0, second=0, microsecond=0,
 								            )
 								            if now.hour < policy.at_hour:
 								                today_reset -= timedelta(days=1)
 								            if entry.updated_at < today_reset:
 								                return True
 								        return False
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								    def _should_reset(self, entry: SessionEntry, source: SessionSource) -> Optional[str]:
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        """
 								        Check if a session should be reset based on policy.
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								        Returns the reset reason ("idle" or "daily") if a reset is needed,
 								        or None if the session is still valid.
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        Sessions with active background processes are never reset.
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        """
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        if self._has_active_processes_fn:
 								            session_key = self._generate_session_key(source)
 								            if self._has_active_processes_fn(session_key):
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								                return None
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        policy = self.config.get_reset_policy(
 								            platform=source.platform,
 								            session_type=source.chat_type
 								        )
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								        if policy.mode == "none":
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								            return None
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        now = _now()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        if policy.mode in ("idle", "both"):
 								            idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
 								            if now > idle_deadline:
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								                return "idle"
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        if policy.mode in ("daily", "both"):
 								            today_reset = now.replace(
 								                hour=policy.at_hour,
 								                minute=0,
 								                second=0,
 								                microsecond=0
 								            )
 								            if now.hour < policy.at_hour:
 								                today_reset -= timedelta(days=1)
 								            if entry.updated_at < today_reset:
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								                return "daily"
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								        return None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								    def has_any_sessions(self) -> bool:
-												refactor: add exception handling and docstring to has_any_sessions

Wrap session_count() in try/except so a DB error falls through to
the heuristic fallback instead of crashing. Added a detailed
docstring explaining why the DB approach is needed and the > 1
assumption (current session already exists when called).

											
										
										
											2026-03-04 05:38:54 -08:00
+								        """Check if any sessions have ever been created (across all platforms).
 								        Uses the SQLite database as the source of truth because it preserves
 								        historical session records (ended sessions still count).  The in-memory
 								        ``_entries`` dict replaces entries on reset, so ``len(_entries)`` would
 								        stay at 1 for single-platform users — which is the bug this fixes.
 								        The current session is already in the DB by the time this is called
 								        (get_or_create_session runs first), so we check ``> 1``.
 								        """
-												fix(session): use database session count for has_any_sessions (#351)

The previous implementation used `len(self._entries) > 1` to check if any
sessions had ever been created. This failed for single-platform users because
when sessions reset (via /reset, auto-reset, or gateway restart), the entry
for the same session_key is replaced in _entries, not added. So len(_entries)
stays at 1 for users who only use one platform.

Fix: Query the SQLite database's session count instead. The database preserves
historical session records (marked as ended), so session_count() correctly
returns > 1 for returning users even after resets.

This prevents the agent from reintroducing itself to returning users after
every session reset.

Fixes #351

											
										
										
											2026-03-04 03:34:57 -05:00
+								        if self._db:
-												refactor: add exception handling and docstring to has_any_sessions

Wrap session_count() in try/except so a DB error falls through to
the heuristic fallback instead of crashing. Added a detailed
docstring explaining why the DB approach is needed and the > 1
assumption (current session already exists when called).

											
										
										
											2026-03-04 05:38:54 -08:00
+								            try:
 								                return self._db.session_count() > 1
 								            except Exception:
 								                pass  # fall through to heuristic
 								        # Fallback: check if sessions.json was loaded with existing data.
 								        # This covers the rare case where the DB is unavailable.
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        with self._lock:
 								            self._ensure_loaded_locked()
 								            return len(self._entries) > 1
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    def get_or_create_session(
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        self,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        source: SessionSource,
 								        force_new: bool = False
 								    ) -> SessionEntry:
 								        """
 								        Get an existing session or create a new one.
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        Evaluates reset policy to determine if the existing session is stale.
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        Creates a session record in SQLite when a new session starts.
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        """
 								        session_key = self._generate_session_key(source)
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        now = _now()
 								        # SQLite calls are made outside the lock to avoid holding it during I/O.
 								        # All _entries / _loaded mutations are protected by self._lock.
 								        db_end_session_id = None
 								        db_create_kwargs = None
 								        with self._lock:
 								            self._ensure_loaded_locked()
 								            if session_key in self._entries and not force_new:
 								                entry = self._entries[session_key]
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								                # Auto-reset sessions marked as suspended (e.g. after /stop
-												fix(gateway): auto-resume sessions after drain-timeout restart (#11852) (#12301)

The shutdown banner promised "send any message after restart to resume
where you left off" but the code did the opposite: a drain-timeout
restart skipped the .clean_shutdown marker, which made the next startup
call suspend_recently_active(), which marked the session suspended,
which made get_or_create_session() spawn a fresh session_id with a
'Session automatically reset. Use /resume...' notice — contradicting
the banner.

Introduce a resume_pending state on SessionEntry that is distinct from
suspended. Drain-timeout shutdown flags active sessions resume_pending
instead of letting startup-wide suspension destroy them. The next
message on the same session_key preserves the session_id, reloads the
transcript, and the agent receives a reason-aware restart-resume
system note that subsumes the existing tool-tail auto-continue note
(PR #9934).

Terminal escalation still flows through the existing
.restart_failure_counts stuck-loop counter (PR #7536, threshold 3) —
no parallel counter on SessionEntry. suspended still wins over
resume_pending in get_or_create_session() so genuinely stuck sessions
converge to a clean slate.

Spec: PR #11852 (BrennerSpear). Implementation follows the spec with
the approved correction (reuse .restart_failure_counts rather than
adding a resume_attempts field).

Changes:
- gateway/session.py: SessionEntry.resume_pending/resume_reason/
  last_resume_marked_at + to_dict/from_dict; SessionStore
  .mark_resume_pending()/clear_resume_pending(); get_or_create_session()
  returns existing entry when resume_pending (suspended still wins);
  suspend_recently_active() skips resume_pending entries.
- gateway/run.py: _stop_impl() drain-timeout branch marks active
  sessions resume_pending before _interrupt_running_agents();
  _run_agent() injects reason-aware restart-resume system note that
  subsumes the tool-tail case; successful-turn cleanup also clears
  resume_pending next to _clear_restart_failure_count();
  _notify_active_sessions_of_shutdown() softens the restart banner to
  'I'll try to resume where you left off' (honest about stuck-loop
  escalation).
- tests/gateway/test_restart_resume_pending.py: 29 new tests covering
  SessionEntry roundtrip, mark/clear helpers, get_or_create_session
  precedence (suspended > resume_pending), suspend_recently_active
  skip, drain-timeout mark reason (restart vs shutdown), system-note
  injection decision tree (including tool-tail subsumption), banner
  wording, and stuck-loop escalation override.
											
										
										
											2026-04-18 17:32:17 -07:00
+								                # broke a stuck loop — #7536).  ``suspended`` is the hard
 								                # forced-wipe signal and always wins over ``resume_pending``,
 								                # so repeated interrupted restarts that escalate via the
 								                # existing ``.restart_failure_counts`` stuck-loop counter
 								                # still converge to a clean slate.
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								                if entry.suspended:
 								                    reset_reason = "suspended"
-												fix(gateway): auto-resume sessions after drain-timeout restart (#11852) (#12301)

The shutdown banner promised "send any message after restart to resume
where you left off" but the code did the opposite: a drain-timeout
restart skipped the .clean_shutdown marker, which made the next startup
call suspend_recently_active(), which marked the session suspended,
which made get_or_create_session() spawn a fresh session_id with a
'Session automatically reset. Use /resume...' notice — contradicting
the banner.

Introduce a resume_pending state on SessionEntry that is distinct from
suspended. Drain-timeout shutdown flags active sessions resume_pending
instead of letting startup-wide suspension destroy them. The next
message on the same session_key preserves the session_id, reloads the
transcript, and the agent receives a reason-aware restart-resume
system note that subsumes the existing tool-tail auto-continue note
(PR #9934).

Terminal escalation still flows through the existing
.restart_failure_counts stuck-loop counter (PR #7536, threshold 3) —
no parallel counter on SessionEntry. suspended still wins over
resume_pending in get_or_create_session() so genuinely stuck sessions
converge to a clean slate.

Spec: PR #11852 (BrennerSpear). Implementation follows the spec with
the approved correction (reuse .restart_failure_counts rather than
adding a resume_attempts field).

Changes:
- gateway/session.py: SessionEntry.resume_pending/resume_reason/
  last_resume_marked_at + to_dict/from_dict; SessionStore
  .mark_resume_pending()/clear_resume_pending(); get_or_create_session()
  returns existing entry when resume_pending (suspended still wins);
  suspend_recently_active() skips resume_pending entries.
- gateway/run.py: _stop_impl() drain-timeout branch marks active
  sessions resume_pending before _interrupt_running_agents();
  _run_agent() injects reason-aware restart-resume system note that
  subsumes the tool-tail case; successful-turn cleanup also clears
  resume_pending next to _clear_restart_failure_count();
  _notify_active_sessions_of_shutdown() softens the restart banner to
  'I'll try to resume where you left off' (honest about stuck-loop
  escalation).
- tests/gateway/test_restart_resume_pending.py: 29 new tests covering
  SessionEntry roundtrip, mark/clear helpers, get_or_create_session
  precedence (suspended > resume_pending), suspend_recently_active
  skip, drain-timeout mark reason (restart vs shutdown), system-note
  injection decision tree (including tool-tail subsumption), banner
  wording, and stuck-loop escalation override.
											
										
										
											2026-04-18 17:32:17 -07:00
+								                elif entry.resume_pending:
 								                    # Restart-interrupted session: preserve the session_id
 								                    # and return the existing entry so the transcript
 								                    # reloads intact.  ``resume_pending`` is cleared after
 								                    # the NEXT successful turn completes (not here), which
 								                    # means a re-interrupted retry keeps trying — the
 								                    # stuck-loop counter handles terminal escalation.
 								                    entry.updated_at = now
 								                    self._save()
 								                    return entry
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								                else:
 								                    reset_reason = self._should_reset(entry, source)
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								                if not reset_reason:
 								                    entry.updated_at = now
 								                    self._save()
 								                    return entry
 								                else:
-												fix(gateway): persist memory flush state to prevent redundant re-flushes on restart (#4481)

* fix: force-close TCP sockets on client cleanup, detect and recover dead connections

When a provider drops connections mid-stream (e.g. OpenRouter outage),
httpx's graceful close leaves sockets in CLOSE-WAIT indefinitely. These
zombie connections accumulate and can prevent recovery without restarting.

Changes:
- _force_close_tcp_sockets: walks the httpx connection pool and issues
  socket.shutdown(SHUT_RDWR) + close() to force TCP RST on every socket
  when a client is closed, preventing CLOSE-WAIT accumulation
- _cleanup_dead_connections: probes the primary client's pool for dead
  sockets (recv MSG_PEEK), rebuilds the client if any are found
- Pre-turn health check at the start of each run_conversation call that
  auto-recovers with a user-facing status message
- Primary client rebuild after stale stream detection to purge pool
- User-facing messages on streaming connection failures:
  "Connection to provider dropped — Reconnecting (attempt 2/3)"
  "Connection failed after 3 attempts — try again in a moment"

Made-with: Cursor

* fix: pool entry missing base_url for openrouter, clean error messages

- _resolve_runtime_from_pool_entry: add OPENROUTER_BASE_URL fallback
  when pool entry has no runtime_base_url (pool entries from auth.json
  credential_pool often omit base_url)
- Replace Rich console.print for auth errors with plain print() to
  prevent ANSI escape code mangling through prompt_toolkit's stdout patch
- Force-close TCP sockets on client cleanup to prevent CLOSE-WAIT
  accumulation after provider outages
- Pre-turn dead connection detection with auto-recovery and user message
- Primary client rebuild after stale stream detection
- User-facing status messages on streaming connection failures/retries

Made-with: Cursor

* fix(gateway): persist memory flush state to prevent redundant re-flushes on restart

The _session_expiry_watcher tracked flushed sessions in an in-memory set
(_pre_flushed_sessions) that was lost on gateway restart. Expired sessions
remained in sessions.json and were re-discovered every restart, causing
redundant AIAgent runs that burned API credits and blocked the event loop.

Fix: Add a memory_flushed boolean field to SessionEntry, persisted in
sessions.json. The watcher sets it after a successful flush. On restart,
the flag survives and the watcher skips already-flushed sessions.

- Add memory_flushed field to SessionEntry with to_dict/from_dict support
- Old sessions.json entries without the field default to False (backward compat)
- Remove the ephemeral _pre_flushed_sessions set from SessionStore
- Update tests: save/load roundtrip, legacy entry compat, auto-reset behavior
											
										
										
											2026-04-01 12:05:02 -07:00
+								                    # Session is being auto-reset.
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								                    was_auto_reset = True
 								                    auto_reset_reason = reset_reason
 								                    # Track whether the expired session had any real conversation
 								                    reset_had_activity = entry.total_tokens > 0
 								                    db_end_session_id = entry.session_id
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								            else:
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								                was_auto_reset = False
 								                auto_reset_reason = None
 								                reset_had_activity = False
 								            # Create new session
 								            session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
 								            entry = SessionEntry(
 								                session_key=session_key,
 								                session_id=session_id,
 								                created_at=now,
 								                updated_at=now,
 								                origin=source,
 								                display_name=source.chat_name,
 								                platform=source.platform,
 								                chat_type=source.chat_type,
 								                was_auto_reset=was_auto_reset,
 								                auto_reset_reason=auto_reset_reason,
 								                reset_had_activity=reset_had_activity,
 								            )
 								            self._entries[session_key] = entry
 								            self._save()
 								            db_create_kwargs = {
 								                "session_id": session_id,
 								                "source": source.platform.value,
 								                "user_id": source.user_id,
 								            }
 								        # SQLite operations outside the lock
 								        if self._db and db_end_session_id:
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								            try:
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								                self._db.end_session(db_end_session_id, "session_reset")
 								            except Exception as e:
 								                logger.debug("Session DB operation failed: %s", e)
 								        if self._db and db_create_kwargs:
 								            try:
 								                self._db.create_session(**db_create_kwargs)
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								            except Exception as e:
 								                print(f"[gateway] Warning: Failed to create SQLite session: {e}")
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        return entry
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    def update_session(
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        self,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        session_key: str,
-												fix: integration hardening for gateway token tracking

Follow-up to 58dbd81 — ensures smooth transition for existing users:

- Backward compat: old session files without last_prompt_tokens
  default to 0 via data.get('last_prompt_tokens', 0)
- /compress, /undo, /retry: reset last_prompt_tokens to 0 after
  rewriting transcripts (stale token counts would under-report)
- Auto-compression hygiene: reset last_prompt_tokens after rewriting
- update_session: use None sentinel (not 0) as default so callers
  can explicitly reset to 0 while normal calls don't clobber
- 6 new tests covering: default value, serialization roundtrip,
  old-format migration, set/reset/no-change semantics
- /reset: new SessionEntry naturally gets last_prompt_tokens=0

2942 tests pass.

											
										
										
											2026-03-10 23:40:24 -07:00
+								        last_prompt_tokens: int = None,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    ) -> None:
-												fix(insights): persist token usage for non-CLI sessions

											
										
										
											2026-04-02 11:15:16 +05:30
+								        """Update lightweight session metadata after an interaction."""
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        with self._lock:
 								            self._ensure_loaded_locked()
 								            if session_key in self._entries:
 								                entry = self._entries[session_key]
 								                entry.updated_at = _now()
 								                if last_prompt_tokens is not None:
 								                    entry.last_prompt_tokens = last_prompt_tokens
 								                self._save()
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								    def suspend_session(self, session_key: str) -> bool:
 								        """Mark a session as suspended so it auto-resets on next access.
 								        Used by ``/stop`` to prevent stuck sessions from being resumed
 								        after a gateway restart (#7536).  Returns True if the session
 								        existed and was marked.
 								        """
 								        with self._lock:
 								            self._ensure_loaded_locked()
 								            if session_key in self._entries:
 								                self._entries[session_key].suspended = True
 								                self._save()
 								                return True
 								        return False
-												fix(gateway): auto-resume sessions after drain-timeout restart (#11852) (#12301)

The shutdown banner promised "send any message after restart to resume
where you left off" but the code did the opposite: a drain-timeout
restart skipped the .clean_shutdown marker, which made the next startup
call suspend_recently_active(), which marked the session suspended,
which made get_or_create_session() spawn a fresh session_id with a
'Session automatically reset. Use /resume...' notice — contradicting
the banner.

Introduce a resume_pending state on SessionEntry that is distinct from
suspended. Drain-timeout shutdown flags active sessions resume_pending
instead of letting startup-wide suspension destroy them. The next
message on the same session_key preserves the session_id, reloads the
transcript, and the agent receives a reason-aware restart-resume
system note that subsumes the existing tool-tail auto-continue note
(PR #9934).

Terminal escalation still flows through the existing
.restart_failure_counts stuck-loop counter (PR #7536, threshold 3) —
no parallel counter on SessionEntry. suspended still wins over
resume_pending in get_or_create_session() so genuinely stuck sessions
converge to a clean slate.

Spec: PR #11852 (BrennerSpear). Implementation follows the spec with
the approved correction (reuse .restart_failure_counts rather than
adding a resume_attempts field).

Changes:
- gateway/session.py: SessionEntry.resume_pending/resume_reason/
  last_resume_marked_at + to_dict/from_dict; SessionStore
  .mark_resume_pending()/clear_resume_pending(); get_or_create_session()
  returns existing entry when resume_pending (suspended still wins);
  suspend_recently_active() skips resume_pending entries.
- gateway/run.py: _stop_impl() drain-timeout branch marks active
  sessions resume_pending before _interrupt_running_agents();
  _run_agent() injects reason-aware restart-resume system note that
  subsumes the tool-tail case; successful-turn cleanup also clears
  resume_pending next to _clear_restart_failure_count();
  _notify_active_sessions_of_shutdown() softens the restart banner to
  'I'll try to resume where you left off' (honest about stuck-loop
  escalation).
- tests/gateway/test_restart_resume_pending.py: 29 new tests covering
  SessionEntry roundtrip, mark/clear helpers, get_or_create_session
  precedence (suspended > resume_pending), suspend_recently_active
  skip, drain-timeout mark reason (restart vs shutdown), system-note
  injection decision tree (including tool-tail subsumption), banner
  wording, and stuck-loop escalation override.
											
										
										
											2026-04-18 17:32:17 -07:00
+								    def mark_resume_pending(
 								        self,
 								        session_key: str,
 								        reason: str = "restart_timeout",
 								    ) -> bool:
 								        """Mark a session as resumable after a restart interruption.
 								        Unlike ``suspend_session()``, this preserves the existing
 								        ``session_id`` and the transcript.  The next call to
 								        ``get_or_create_session()`` for this key returns the same entry
 								        so the user auto-resumes on the same conversation lane.
 								        Returns True if the session existed and was marked.
 								        """
 								        with self._lock:
 								            self._ensure_loaded_locked()
 								            if session_key in self._entries:
 								                entry = self._entries[session_key]
 								                # Never override an explicit ``suspended`` — that is a hard
 								                # forced-wipe signal (from /stop or stuck-loop escalation).
 								                if entry.suspended:
 								                    return False
 								                entry.resume_pending = True
 								                entry.resume_reason = reason
 								                entry.last_resume_marked_at = _now()
 								                self._save()
 								                return True
 								        return False
 								    def clear_resume_pending(self, session_key: str) -> bool:
 								        """Clear the resume-pending flag after a successful resumed turn.
 								        Called from the gateway after ``run_conversation()`` returns a
 								        final response for a session that had ``resume_pending=True``,
 								        signalling that recovery succeeded.
 								        Returns True if a flag was cleared.
 								        """
 								        with self._lock:
 								            self._ensure_loaded_locked()
 								            entry = self._entries.get(session_key)
 								            if entry is None or not entry.resume_pending:
 								                return False
 								            entry.resume_pending = False
 								            entry.resume_reason = None
 								            entry.last_resume_marked_at = None
 								            self._save()
 								            return True
-												fix(gateway): prune stale SessionStore entries to bound memory + disk (#11789)

SessionStore._entries grew unbounded.  Every unique
(platform, chat_id, thread_id, user_id) tuple ever seen was kept in
RAM and rewritten to sessions.json on every message.  A Discord bot
in 100 servers x 100 channels x ~100 rotating users accumulates on
the order of 10^5 entries after a few months; each sessions.json
write becomes an O(n) fsync.  Nothing trimmed this — there was no
TTL, no cap, no eviction path.

Changes
-------
* SessionStore.prune_old_entries(max_age_days) — drops entries whose
  updated_at is older than the cutoff.  Preserves:
    - suspended entries (user paused them via /stop for later resume)
    - entries with an active background process attached
  Pruning is functionally identical to a natural reset-policy expiry:
  SQLite transcript stays, session_key -> session_id mapping dropped,
  returning user gets a fresh session.

* GatewayConfig.session_store_max_age_days (default 90; 0 disables).
  Serialized in to_dict/from_dict, coerced from bad types / negatives
  to safe defaults.  No migration needed — missing field -> 90 days.

* _session_expiry_watcher calls prune_old_entries once per hour
  (first tick is immediate).  Uses the existing watcher loop so no
  new background task is created.

Why not more aggressive
-----------------------
90 days is long enough that legitimate long-idle users (seasonal,
vacation, etc.) aren't surprised — pruning just means they get a
fresh session on return, same outcome they'd get from any other
reset-policy trigger.  Admins can lower it via config; 0 disables.

Tests
-----
tests/gateway/test_session_store_prune.py — 17 cases covering:
  * entry age based on updated_at, not created_at
  * max_age_days=0 disables; negative coerces to 0
  * suspended + active-process entries are skipped
  * _save fires iff something was removed
  * disk JSON reflects post-prune state
  * thread safety against concurrent readers
  * config field roundtrips + graceful fallback on bad values
  * watcher gate logic (first tick prunes, subsequent within 1h don't)

119 broader session/gateway tests remain green.
											
										
										
											2026-04-17 13:48:49 -07:00
+								    def prune_old_entries(self, max_age_days: int) -> int:
 								        """Drop SessionEntry records older than max_age_days.
 								        Pruning is based on ``updated_at`` (last activity), not ``created_at``.
 								        A session that's been active within the window is kept regardless of
 								        how old it is.  Entries marked ``suspended`` are kept — the user
 								        explicitly paused them for later resume.  Entries held by an active
 								        process (via has_active_processes_fn) are also kept so long-running
 								        background work isn't orphaned.
 								        Pruning is functionally identical to a natural reset-policy expiry:
 								        the transcript in SQLite stays, but the session_key → session_id
 								        mapping is dropped and the user starts a fresh session on return.
 								        ``max_age_days <= 0`` disables pruning; returns 0 immediately.
 								        Returns the number of entries removed.
 								        """
 								        if max_age_days is None or max_age_days <= 0:
 								            return 0
 								        from datetime import timedelta
 								        cutoff = _now() - timedelta(days=max_age_days)
 								        removed_keys: list[str] = []
 								        with self._lock:
 								            self._ensure_loaded_locked()
 								            for key, entry in list(self._entries.items()):
 								                if entry.suspended:
 								                    continue
 								                # Never prune sessions with an active background process
 								                # attached — the user may still be waiting on output.
-												fix(gateway): pass session_key (not session_id) to active-process check during prune

SessionStore.prune_old_entries was calling
self._has_active_processes_fn(entry.session_id) but the callback wired
up in gateway/run.py is process_registry.has_active_for_session, which
compares against session_key, not session_id. Every other caller in
session.py (_is_session_expired, _should_reset) already passes
session_key, so prune was the only outlier — and because session_id and
session_key live in different namespaces, the guard never fired.

Result in production: sessions with live background processes (queued
cron output, detached agents, long-running Bash) were pruned out of
_entries despite the docstring promising they'd be preserved. When the
process finished and tried to deliver output, the session_key to
session_id mapping was gone and the work was effectively orphaned.

Also update the existing test_prune_skips_entries_with_active_processes,
which was checking the wrong interface (its mock callback took session_id
so it agreed with the buggy implementation). The test now uses a
session_key-based mock, matching the production callback's real contract,
and a new regression guard test pins the behaviour.

Swallowed exceptions inside the prune loop now log at debug level instead
of silently disappearing.

											
										
										
											2026-04-18 13:18:36 +01:00
+								                # The callback is keyed by session_key (see process_registry.
 								                # has_active_for_session); passing session_id here used to
 								                # never match, so active sessions got pruned anyway.
-												fix(gateway): prune stale SessionStore entries to bound memory + disk (#11789)

SessionStore._entries grew unbounded.  Every unique
(platform, chat_id, thread_id, user_id) tuple ever seen was kept in
RAM and rewritten to sessions.json on every message.  A Discord bot
in 100 servers x 100 channels x ~100 rotating users accumulates on
the order of 10^5 entries after a few months; each sessions.json
write becomes an O(n) fsync.  Nothing trimmed this — there was no
TTL, no cap, no eviction path.

Changes
-------
* SessionStore.prune_old_entries(max_age_days) — drops entries whose
  updated_at is older than the cutoff.  Preserves:
    - suspended entries (user paused them via /stop for later resume)
    - entries with an active background process attached
  Pruning is functionally identical to a natural reset-policy expiry:
  SQLite transcript stays, session_key -> session_id mapping dropped,
  returning user gets a fresh session.

* GatewayConfig.session_store_max_age_days (default 90; 0 disables).
  Serialized in to_dict/from_dict, coerced from bad types / negatives
  to safe defaults.  No migration needed — missing field -> 90 days.

* _session_expiry_watcher calls prune_old_entries once per hour
  (first tick is immediate).  Uses the existing watcher loop so no
  new background task is created.

Why not more aggressive
-----------------------
90 days is long enough that legitimate long-idle users (seasonal,
vacation, etc.) aren't surprised — pruning just means they get a
fresh session on return, same outcome they'd get from any other
reset-policy trigger.  Admins can lower it via config; 0 disables.

Tests
-----
tests/gateway/test_session_store_prune.py — 17 cases covering:
  * entry age based on updated_at, not created_at
  * max_age_days=0 disables; negative coerces to 0
  * suspended + active-process entries are skipped
  * _save fires iff something was removed
  * disk JSON reflects post-prune state
  * thread safety against concurrent readers
  * config field roundtrips + graceful fallback on bad values
  * watcher gate logic (first tick prunes, subsequent within 1h don't)

119 broader session/gateway tests remain green.
											
										
										
											2026-04-17 13:48:49 -07:00
+								                if self._has_active_processes_fn is not None:
 								                    try:
-												fix(gateway): pass session_key (not session_id) to active-process check during prune

SessionStore.prune_old_entries was calling
self._has_active_processes_fn(entry.session_id) but the callback wired
up in gateway/run.py is process_registry.has_active_for_session, which
compares against session_key, not session_id. Every other caller in
session.py (_is_session_expired, _should_reset) already passes
session_key, so prune was the only outlier — and because session_id and
session_key live in different namespaces, the guard never fired.

Result in production: sessions with live background processes (queued
cron output, detached agents, long-running Bash) were pruned out of
_entries despite the docstring promising they'd be preserved. When the
process finished and tried to deliver output, the session_key to
session_id mapping was gone and the work was effectively orphaned.

Also update the existing test_prune_skips_entries_with_active_processes,
which was checking the wrong interface (its mock callback took session_id
so it agreed with the buggy implementation). The test now uses a
session_key-based mock, matching the production callback's real contract,
and a new regression guard test pins the behaviour.

Swallowed exceptions inside the prune loop now log at debug level instead
of silently disappearing.

											
										
										
											2026-04-18 13:18:36 +01:00
+								                        if self._has_active_processes_fn(entry.session_key):
-												fix(gateway): prune stale SessionStore entries to bound memory + disk (#11789)

SessionStore._entries grew unbounded.  Every unique
(platform, chat_id, thread_id, user_id) tuple ever seen was kept in
RAM and rewritten to sessions.json on every message.  A Discord bot
in 100 servers x 100 channels x ~100 rotating users accumulates on
the order of 10^5 entries after a few months; each sessions.json
write becomes an O(n) fsync.  Nothing trimmed this — there was no
TTL, no cap, no eviction path.

Changes
-------
* SessionStore.prune_old_entries(max_age_days) — drops entries whose
  updated_at is older than the cutoff.  Preserves:
    - suspended entries (user paused them via /stop for later resume)
    - entries with an active background process attached
  Pruning is functionally identical to a natural reset-policy expiry:
  SQLite transcript stays, session_key -> session_id mapping dropped,
  returning user gets a fresh session.

* GatewayConfig.session_store_max_age_days (default 90; 0 disables).
  Serialized in to_dict/from_dict, coerced from bad types / negatives
  to safe defaults.  No migration needed — missing field -> 90 days.

* _session_expiry_watcher calls prune_old_entries once per hour
  (first tick is immediate).  Uses the existing watcher loop so no
  new background task is created.

Why not more aggressive
-----------------------
90 days is long enough that legitimate long-idle users (seasonal,
vacation, etc.) aren't surprised — pruning just means they get a
fresh session on return, same outcome they'd get from any other
reset-policy trigger.  Admins can lower it via config; 0 disables.

Tests
-----
tests/gateway/test_session_store_prune.py — 17 cases covering:
  * entry age based on updated_at, not created_at
  * max_age_days=0 disables; negative coerces to 0
  * suspended + active-process entries are skipped
  * _save fires iff something was removed
  * disk JSON reflects post-prune state
  * thread safety against concurrent readers
  * config field roundtrips + graceful fallback on bad values
  * watcher gate logic (first tick prunes, subsequent within 1h don't)

119 broader session/gateway tests remain green.
											
										
										
											2026-04-17 13:48:49 -07:00
+								                            continue
-												fix(gateway): pass session_key (not session_id) to active-process check during prune

SessionStore.prune_old_entries was calling
self._has_active_processes_fn(entry.session_id) but the callback wired
up in gateway/run.py is process_registry.has_active_for_session, which
compares against session_key, not session_id. Every other caller in
session.py (_is_session_expired, _should_reset) already passes
session_key, so prune was the only outlier — and because session_id and
session_key live in different namespaces, the guard never fired.

Result in production: sessions with live background processes (queued
cron output, detached agents, long-running Bash) were pruned out of
_entries despite the docstring promising they'd be preserved. When the
process finished and tried to deliver output, the session_key to
session_id mapping was gone and the work was effectively orphaned.

Also update the existing test_prune_skips_entries_with_active_processes,
which was checking the wrong interface (its mock callback took session_id
so it agreed with the buggy implementation). The test now uses a
session_key-based mock, matching the production callback's real contract,
and a new regression guard test pins the behaviour.

Swallowed exceptions inside the prune loop now log at debug level instead
of silently disappearing.

											
										
										
											2026-04-18 13:18:36 +01:00
+								                    except Exception as exc:
 								                        logger.debug(
 								                            "has_active_processes_fn raised during prune for %s: %s",
 								                            entry.session_key, exc,
 								                        )
-												fix(gateway): prune stale SessionStore entries to bound memory + disk (#11789)

SessionStore._entries grew unbounded.  Every unique
(platform, chat_id, thread_id, user_id) tuple ever seen was kept in
RAM and rewritten to sessions.json on every message.  A Discord bot
in 100 servers x 100 channels x ~100 rotating users accumulates on
the order of 10^5 entries after a few months; each sessions.json
write becomes an O(n) fsync.  Nothing trimmed this — there was no
TTL, no cap, no eviction path.

Changes
-------
* SessionStore.prune_old_entries(max_age_days) — drops entries whose
  updated_at is older than the cutoff.  Preserves:
    - suspended entries (user paused them via /stop for later resume)
    - entries with an active background process attached
  Pruning is functionally identical to a natural reset-policy expiry:
  SQLite transcript stays, session_key -> session_id mapping dropped,
  returning user gets a fresh session.

* GatewayConfig.session_store_max_age_days (default 90; 0 disables).
  Serialized in to_dict/from_dict, coerced from bad types / negatives
  to safe defaults.  No migration needed — missing field -> 90 days.

* _session_expiry_watcher calls prune_old_entries once per hour
  (first tick is immediate).  Uses the existing watcher loop so no
  new background task is created.

Why not more aggressive
-----------------------
90 days is long enough that legitimate long-idle users (seasonal,
vacation, etc.) aren't surprised — pruning just means they get a
fresh session on return, same outcome they'd get from any other
reset-policy trigger.  Admins can lower it via config; 0 disables.

Tests
-----
tests/gateway/test_session_store_prune.py — 17 cases covering:
  * entry age based on updated_at, not created_at
  * max_age_days=0 disables; negative coerces to 0
  * suspended + active-process entries are skipped
  * _save fires iff something was removed
  * disk JSON reflects post-prune state
  * thread safety against concurrent readers
  * config field roundtrips + graceful fallback on bad values
  * watcher gate logic (first tick prunes, subsequent within 1h don't)

119 broader session/gateway tests remain green.
											
										
										
											2026-04-17 13:48:49 -07:00
+								                if entry.updated_at < cutoff:
 								                    removed_keys.append(key)
 								            for key in removed_keys:
 								                self._entries.pop(key, None)
 								            if removed_keys:
 								                self._save()
 								        if removed_keys:
 								            logger.info(
 								                "SessionStore pruned %d entries older than %d days",
 								                len(removed_keys), max_age_days,
 								            )
 								        return len(removed_keys)
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								    def suspend_recently_active(self, max_age_seconds: int = 120) -> int:
 								        """Mark recently-active sessions as suspended.
 								        Called on gateway startup to prevent sessions that were likely
 								        in-flight when the gateway last exited from being blindly resumed
 								        (#7536).  Only suspends sessions updated within *max_age_seconds*
 								        to avoid resetting long-idle sessions that are harmless to resume.
 								        Returns the number of sessions that were suspended.
-												fix(gateway): auto-resume sessions after drain-timeout restart (#11852) (#12301)

The shutdown banner promised "send any message after restart to resume
where you left off" but the code did the opposite: a drain-timeout
restart skipped the .clean_shutdown marker, which made the next startup
call suspend_recently_active(), which marked the session suspended,
which made get_or_create_session() spawn a fresh session_id with a
'Session automatically reset. Use /resume...' notice — contradicting
the banner.

Introduce a resume_pending state on SessionEntry that is distinct from
suspended. Drain-timeout shutdown flags active sessions resume_pending
instead of letting startup-wide suspension destroy them. The next
message on the same session_key preserves the session_id, reloads the
transcript, and the agent receives a reason-aware restart-resume
system note that subsumes the existing tool-tail auto-continue note
(PR #9934).

Terminal escalation still flows through the existing
.restart_failure_counts stuck-loop counter (PR #7536, threshold 3) —
no parallel counter on SessionEntry. suspended still wins over
resume_pending in get_or_create_session() so genuinely stuck sessions
converge to a clean slate.

Spec: PR #11852 (BrennerSpear). Implementation follows the spec with
the approved correction (reuse .restart_failure_counts rather than
adding a resume_attempts field).

Changes:
- gateway/session.py: SessionEntry.resume_pending/resume_reason/
  last_resume_marked_at + to_dict/from_dict; SessionStore
  .mark_resume_pending()/clear_resume_pending(); get_or_create_session()
  returns existing entry when resume_pending (suspended still wins);
  suspend_recently_active() skips resume_pending entries.
- gateway/run.py: _stop_impl() drain-timeout branch marks active
  sessions resume_pending before _interrupt_running_agents();
  _run_agent() injects reason-aware restart-resume system note that
  subsumes the tool-tail case; successful-turn cleanup also clears
  resume_pending next to _clear_restart_failure_count();
  _notify_active_sessions_of_shutdown() softens the restart banner to
  'I'll try to resume where you left off' (honest about stuck-loop
  escalation).
- tests/gateway/test_restart_resume_pending.py: 29 new tests covering
  SessionEntry roundtrip, mark/clear helpers, get_or_create_session
  precedence (suspended > resume_pending), suspend_recently_active
  skip, drain-timeout mark reason (restart vs shutdown), system-note
  injection decision tree (including tool-tail subsumption), banner
  wording, and stuck-loop escalation override.
											
										
										
											2026-04-18 17:32:17 -07:00
 								        Entries flagged ``resume_pending=True`` are skipped — those were
 								        marked intentionally by the drain-timeout path as recoverable.
 								        Terminal escalation for genuinely stuck ``resume_pending`` sessions
 								        is handled by the existing ``.restart_failure_counts`` stuck-loop
 								        counter, which runs after this method on startup.
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								        """
-												fix(matrix): replace pickle crypto store with SQLite, fix E2EE decryption (#7981)

Fixes #7952 — Matrix E2EE completely broken after mautrix migration.

- Replace MemoryCryptoStore + pickle/HMAC persistence with mautrix's
  PgCryptoStore backed by SQLite via aiosqlite. Crypto state now
  persists reliably across restarts without fragile serialization.

- Add handle_sync() call on initial sync response so to-device events
  (queued Megolm key shares) are dispatched to OlmMachine instead of
  being silently dropped.

- Add _verify_device_keys_on_server() after loading crypto state.
  Detects missing keys (re-uploads), stale keys from migration
  (attempts re-upload), and corrupted state (refuses E2EE).

- Add _CryptoStateStore adapter wrapping MemoryStateStore to satisfy
  mautrix crypto's StateStore interface (is_encrypted,
  get_encryption_info, find_shared_rooms).

- Remove redundant share_keys() call from sync loop — OlmMachine
  already handles this via DEVICE_OTK_COUNT event handler.

- Fix datetime vs float TypeError in session.py suspend_recently_active()
  that crashed gateway startup.

- Add aiosqlite and asyncpg to [matrix] extra in pyproject.toml.

- Update test mocks for PgCryptoStore/Database and add query_keys mock
  for key verification. 174 tests pass.

- Add E2EE upgrade/migration docs to Matrix user guide.
											
										
										
											2026-04-11 18:54:46 -07:00
+								        from datetime import timedelta
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
-												fix(matrix): replace pickle crypto store with SQLite, fix E2EE decryption (#7981)

Fixes #7952 — Matrix E2EE completely broken after mautrix migration.

- Replace MemoryCryptoStore + pickle/HMAC persistence with mautrix's
  PgCryptoStore backed by SQLite via aiosqlite. Crypto state now
  persists reliably across restarts without fragile serialization.

- Add handle_sync() call on initial sync response so to-device events
  (queued Megolm key shares) are dispatched to OlmMachine instead of
  being silently dropped.

- Add _verify_device_keys_on_server() after loading crypto state.
  Detects missing keys (re-uploads), stale keys from migration
  (attempts re-upload), and corrupted state (refuses E2EE).

- Add _CryptoStateStore adapter wrapping MemoryStateStore to satisfy
  mautrix crypto's StateStore interface (is_encrypted,
  get_encryption_info, find_shared_rooms).

- Remove redundant share_keys() call from sync loop — OlmMachine
  already handles this via DEVICE_OTK_COUNT event handler.

- Fix datetime vs float TypeError in session.py suspend_recently_active()
  that crashed gateway startup.

- Add aiosqlite and asyncpg to [matrix] extra in pyproject.toml.

- Update test mocks for PgCryptoStore/Database and add query_keys mock
  for key verification. 174 tests pass.

- Add E2EE upgrade/migration docs to Matrix user guide.
											
										
										
											2026-04-11 18:54:46 -07:00
+								        cutoff = _now() - timedelta(seconds=max_age_seconds)
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								        count = 0
 								        with self._lock:
 								            self._ensure_loaded_locked()
 								            for entry in self._entries.values():
-												fix(gateway): auto-resume sessions after drain-timeout restart (#11852) (#12301)

The shutdown banner promised "send any message after restart to resume
where you left off" but the code did the opposite: a drain-timeout
restart skipped the .clean_shutdown marker, which made the next startup
call suspend_recently_active(), which marked the session suspended,
which made get_or_create_session() spawn a fresh session_id with a
'Session automatically reset. Use /resume...' notice — contradicting
the banner.

Introduce a resume_pending state on SessionEntry that is distinct from
suspended. Drain-timeout shutdown flags active sessions resume_pending
instead of letting startup-wide suspension destroy them. The next
message on the same session_key preserves the session_id, reloads the
transcript, and the agent receives a reason-aware restart-resume
system note that subsumes the existing tool-tail auto-continue note
(PR #9934).

Terminal escalation still flows through the existing
.restart_failure_counts stuck-loop counter (PR #7536, threshold 3) —
no parallel counter on SessionEntry. suspended still wins over
resume_pending in get_or_create_session() so genuinely stuck sessions
converge to a clean slate.

Spec: PR #11852 (BrennerSpear). Implementation follows the spec with
the approved correction (reuse .restart_failure_counts rather than
adding a resume_attempts field).

Changes:
- gateway/session.py: SessionEntry.resume_pending/resume_reason/
  last_resume_marked_at + to_dict/from_dict; SessionStore
  .mark_resume_pending()/clear_resume_pending(); get_or_create_session()
  returns existing entry when resume_pending (suspended still wins);
  suspend_recently_active() skips resume_pending entries.
- gateway/run.py: _stop_impl() drain-timeout branch marks active
  sessions resume_pending before _interrupt_running_agents();
  _run_agent() injects reason-aware restart-resume system note that
  subsumes the tool-tail case; successful-turn cleanup also clears
  resume_pending next to _clear_restart_failure_count();
  _notify_active_sessions_of_shutdown() softens the restart banner to
  'I'll try to resume where you left off' (honest about stuck-loop
  escalation).
- tests/gateway/test_restart_resume_pending.py: 29 new tests covering
  SessionEntry roundtrip, mark/clear helpers, get_or_create_session
  precedence (suspended > resume_pending), suspend_recently_active
  skip, drain-timeout mark reason (restart vs shutdown), system-note
  injection decision tree (including tool-tail subsumption), banner
  wording, and stuck-loop escalation override.
											
										
										
											2026-04-18 17:32:17 -07:00
+								                if entry.resume_pending:
 								                    continue
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								                if not entry.suspended and entry.updated_at >= cutoff:
 								                    entry.suspended = True
 								                    count += 1
 								            if count:
 								                self._save()
 								        return count
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    def reset_session(self, session_key: str) -> Optional[SessionEntry]:
 								        """Force reset a session, creating a new session ID."""
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        db_end_session_id = None
 								        db_create_kwargs = None
 								        new_entry = None
 								        with self._lock:
 								            self._ensure_loaded_locked()
 								            if session_key not in self._entries:
 								                return None
 								            old_entry = self._entries[session_key]
 								            db_end_session_id = old_entry.session_id
 								            now = _now()
 								            session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
 								            new_entry = SessionEntry(
 								                session_key=session_key,
 								                session_id=session_id,
 								                created_at=now,
 								                updated_at=now,
 								                origin=old_entry.origin,
 								                display_name=old_entry.display_name,
 								                platform=old_entry.platform,
 								                chat_type=old_entry.chat_type,
 								            )
 								            self._entries[session_key] = new_entry
 								            self._save()
 								            db_create_kwargs = {
 								                "session_id": session_id,
 								                "source": old_entry.platform.value if old_entry.platform else "unknown",
 								                "user_id": old_entry.origin.user_id if old_entry.origin else None,
 								            }
 								        if self._db and db_end_session_id:
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								            try:
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								                self._db.end_session(db_end_session_id, "session_reset")
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								            except Exception as e:
 								                logger.debug("Session DB operation failed: %s", e)
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
 								        if self._db and db_create_kwargs:
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								            try:
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								                self._db.create_session(**db_create_kwargs)
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								            except Exception as e:
 								                logger.debug("Session DB operation failed: %s", e)
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        return new_entry
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
 								    def switch_session(self, session_key: str, target_session_id: str) -> Optional[SessionEntry]:
 								        """Switch a session key to point at an existing session ID.
 								        Used by ``/resume`` to restore a previously-named session.
 								        Ends the current session in SQLite (like reset), but instead of
 								        generating a fresh session ID, re-uses ``target_session_id`` so the
-												fix: reopen resumed gateway sessions in sqlite

											
										
										
											2026-04-13 17:50:42 +08:00
+								        old transcript is loaded on the next message. If the target session was
 								        previously ended, re-open it so gateway resume semantics match the CLI.
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
+								        """
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        db_end_session_id = None
 								        new_entry = None
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        with self._lock:
 								            self._ensure_loaded_locked()
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								            if session_key not in self._entries:
 								                return None
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								            old_entry = self._entries[session_key]
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								            # Don't switch if already on that session
 								            if old_entry.session_id == target_session_id:
 								                return old_entry
 								            db_end_session_id = old_entry.session_id
 								            now = _now()
 								            new_entry = SessionEntry(
 								                session_key=session_key,
 								                session_id=target_session_id,
 								                created_at=now,
 								                updated_at=now,
 								                origin=old_entry.origin,
 								                display_name=old_entry.display_name,
 								                platform=old_entry.platform,
 								                chat_type=old_entry.chat_type,
 								            )
 								            self._entries[session_key] = new_entry
 								            self._save()
 								        if self._db and db_end_session_id:
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
+								            try:
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								                self._db.end_session(db_end_session_id, "session_switch")
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
+								            except Exception as e:
 								                logger.debug("Session DB end_session failed: %s", e)
-												fix: reopen resumed gateway sessions in sqlite

											
										
										
											2026-04-13 17:50:42 +08:00
+								        if self._db:
 								            try:
 								                self._db.reopen_session(target_session_id)
 								            except Exception as e:
 								                logger.debug("Session DB reopen_session failed: %s", e)
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
+								        return new_entry
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]:
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        """List all sessions, optionally filtered by activity."""
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								        with self._lock:
 								            self._ensure_loaded_locked()
 								            entries = list(self._entries.values())
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        if active_minutes is not None:
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								            cutoff = _now() - timedelta(minutes=active_minutes)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            entries = [e for e in entries if e.updated_at >= cutoff]
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        entries.sort(key=lambda e: e.updated_at, reverse=True)
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        return entries
 								    def get_transcript_path(self, session_id: str) -> Path:
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        """Get the path to a session's legacy transcript file."""
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        return self.sessions_dir / f"{session_id}.jsonl"
-												fix: eliminate 3x SQLite message duplication in gateway sessions (#860)

Three separate code paths all wrote to the same SQLite state.db with
no deduplication, inflating session transcripts by 3-4x:

1. _log_msg_to_db() — wrote each message individually after append
2. _flush_messages_to_session_db() — re-wrote ALL new messages at
   every _persist_session() call (~18 exit points), with no tracking
   of what was already written
3. gateway append_to_transcript() — wrote everything a third time
   after the agent returned

Since load_transcript() prefers SQLite over JSONL, the inflated data
was loaded on every session resume, causing proportional token waste.

Fix:
- Remove _log_msg_to_db() and all 16 call sites (redundant with flush)
- Add _last_flushed_db_idx tracking in _flush_messages_to_session_db()
  so repeated _persist_session() calls only write truly new messages
- Reset flush cursor on compression (new session ID)
- Add skip_db parameter to SessionStore.append_to_transcript() so the
  gateway skips SQLite writes when the agent already persisted them
- Gateway now passes skip_db=True for agent-managed messages, still
  writes to JSONL as backup

Verified: a 12-message CLI session with tool calls produces exactly
12 SQLite rows with zero duplicates (previously would be 36-48).

Tests: 9 new tests covering flush deduplication, skip_db behavior,
compression reset, and initialization. Full suite passes (2869 tests).

											
										
										
											2026-03-10 15:22:44 -07:00
+								    def append_to_transcript(self, session_id: str, message: Dict[str, Any], skip_db: bool = False) -> None:
 								        """Append a message to a session's transcript (SQLite + legacy JSONL).
 								        Args:
 								            skip_db: When True, only write to JSONL and skip the SQLite write.
 								                     Used when the agent already persisted messages to SQLite
 								                     via its own _flush_messages_to_session_db(), preventing
 								                     the duplicate-write bug (#860).
 								        """
 								        # Write to SQLite (unless the agent already handled it)
 								        if self._db and not skip_db:
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								            try:
 								                self._db.append_message(
 								                    session_id=session_id,
-												fix(gateway): thread-safe SessionStore — protect _entries with threading.Lock (#3052)

SessionStore._entries was read and mutated without synchronisation,
causing race conditions when multiple platforms (Telegram + Discord)
received messages concurrently on the same gateway process. Two threads
could simultaneously pass the session_key check and create duplicate
sessions for the same user, splitting conversation history.

- Added threading.Lock to protect all _entries / _loaded mutations
- Split _ensure_loaded() into public wrapper + internal _ensure_loaded_locked()
- SQLite I/O is performed outside the lock to avoid blocking during
  slow disk operations
- _save() stays inside the lock since it reads _entries for serialization

Cherry-picked from PR #3012 by Kewe63. Removed unrelated changes
(delivery.py case-sensitivity, hermes_state.py schema tracking) and
stripped the UTC timezone switch to keep the change focused on threading.

Co-authored-by: Kewe63 <Kewe63@users.noreply.github.com>
											
										
										
											2026-03-25 15:15:37 -07:00
+								                    role=message.get("role", "unknown"),
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								                    content=message.get("content"),
 								                    tool_name=message.get("tool_name"),
 								                    tool_calls=message.get("tool_calls"),
 								                    tool_call_id=message.get("tool_call_id"),
-												fix: preserve reasoning_content on Kimi replay

											
										
										
											2026-04-21 23:40:31 -06:00
+								                    reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
 								                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
 								                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
 								                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
-												fix(agent): preserve Codex message items for replay

											
										
										
											2026-04-25 23:14:12 +03:00
+								                    codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								                )
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								            except Exception as e:
 								                logger.debug("Session DB operation failed: %s", e)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        # Also write legacy JSONL (keeps existing tooling working during transition)
 								        transcript_path = self.get_transcript_path(session_id)
-												fix(gateway): add missing UTF-8 encoding to file I/O preventing crashes on Windows

On Windows, Python's open() defaults to the system locale encoding
(e.g. cp1254 for Turkish, cp1252 for Western European) instead of
UTF-8. The gateway already uses ensure_ascii=False in json.dumps()
to preserve Unicode characters in chat messages, but the
corresponding open() calls lack encoding="utf-8". This mismatch
causes UnicodeEncodeError / UnicodeDecodeError when users send
non-ASCII messages (Turkish, Japanese, Arabic, emoji, etc.) through
Telegram, Discord, WhatsApp, or Slack on Windows.

The project already fixed this for .env files in hermes_cli/config.py
(line 624) but the gateway module was missed.

Files fixed:
- gateway/session.py: session index + JSONL transcript read/write (5 calls)
- gateway/channel_directory.py: channel directory read/write (3 calls)
- gateway/mirror.py: session index read + transcript append (2 calls)

											
										
										
											2026-03-04 11:32:57 +03:00
+								        with open(transcript_path, "a", encoding="utf-8") as f:
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            f.write(json.dumps(message, ensure_ascii=False) + "\n")
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								    def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
 								        """Replace the entire transcript for a session with new messages.
 								        Used by /retry, /undo, and /compress to persist modified conversation history.
 								        Rewrites both SQLite and legacy JSONL storage.
 								        """
 								        # SQLite: clear old messages and re-insert
 								        if self._db:
 								            try:
 								                self._db.clear_messages(session_id)
 								                for msg in messages:
-												fix(session): preserve reasoning fields in rewrite_transcript (#3311)

rewrite_transcript (used by /retry, /undo, /compress) was calling
append_message without reasoning, reasoning_details, or
codex_reasoning_items — permanently dropping them from SQLite.

Co-authored-by: alireza78a <alireza78.crypto@gmail.com>
											
										
										
											2026-03-26 18:18:00 -07:00
+								                    role = msg.get("role", "unknown")
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								                    self._db.append_message(
 								                        session_id=session_id,
-												fix(session): preserve reasoning fields in rewrite_transcript (#3311)

rewrite_transcript (used by /retry, /undo, /compress) was calling
append_message without reasoning, reasoning_details, or
codex_reasoning_items — permanently dropping them from SQLite.

Co-authored-by: alireza78a <alireza78.crypto@gmail.com>
											
										
										
											2026-03-26 18:18:00 -07:00
+								                        role=role,
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								                        content=msg.get("content"),
 								                        tool_name=msg.get("tool_name"),
 								                        tool_calls=msg.get("tool_calls"),
 								                        tool_call_id=msg.get("tool_call_id"),
-												fix(session): preserve reasoning fields in rewrite_transcript (#3311)

rewrite_transcript (used by /retry, /undo, /compress) was calling
append_message without reasoning, reasoning_details, or
codex_reasoning_items — permanently dropping them from SQLite.

Co-authored-by: alireza78a <alireza78.crypto@gmail.com>
											
										
										
											2026-03-26 18:18:00 -07:00
+								                        reasoning=msg.get("reasoning") if role == "assistant" else None,
-												fix: preserve reasoning_content on Kimi replay

											
										
										
											2026-04-21 23:40:31 -06:00
+								                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
-												fix(session): preserve reasoning fields in rewrite_transcript (#3311)

rewrite_transcript (used by /retry, /undo, /compress) was calling
append_message without reasoning, reasoning_details, or
codex_reasoning_items — permanently dropping them from SQLite.

Co-authored-by: alireza78a <alireza78.crypto@gmail.com>
											
										
										
											2026-03-26 18:18:00 -07:00
+								                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
 								                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
-												fix(agent): preserve Codex message items for replay

											
										
										
											2026-04-25 23:14:12 +03:00
+								                        codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								                    )
 								            except Exception as e:
 								                logger.debug("Failed to rewrite transcript in DB: %s", e)
 								        # JSONL: overwrite the file
 								        transcript_path = self.get_transcript_path(session_id)
-												fix(gateway): add missing UTF-8 encoding to file I/O preventing crashes on Windows

On Windows, Python's open() defaults to the system locale encoding
(e.g. cp1254 for Turkish, cp1252 for Western European) instead of
UTF-8. The gateway already uses ensure_ascii=False in json.dumps()
to preserve Unicode characters in chat messages, but the
corresponding open() calls lack encoding="utf-8". This mismatch
causes UnicodeEncodeError / UnicodeDecodeError when users send
non-ASCII messages (Turkish, Japanese, Arabic, emoji, etc.) through
Telegram, Discord, WhatsApp, or Slack on Windows.

The project already fixed this for .env files in hermes_cli/config.py
(line 624) but the gateway module was missed.

Files fixed:
- gateway/session.py: session index + JSONL transcript read/write (5 calls)
- gateway/channel_directory.py: channel directory read/write (3 calls)
- gateway/mirror.py: session index read + transcript append (2 calls)

											
										
										
											2026-03-04 11:32:57 +03:00
+								        with open(transcript_path, "w", encoding="utf-8") as f:
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								            for msg in messages:
 								                f.write(json.dumps(msg, ensure_ascii=False) + "\n")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    def load_transcript(self, session_id: str) -> List[Dict[str, Any]]:
 								        """Load all messages from a session's transcript."""
-												fix(state): SQLite concurrency hardening + session transcript integrity (#3249)

* fix(session-db): survive CLI/gateway concurrent write contention

Closes #3139

Three layered fixes for the scenario where CLI and gateway write to
state.db concurrently, causing create_session() to fail with
'database is locked' and permanently disabling session_search on the
gateway side.

1. Increase SQLite connection timeout: 10s -> 30s
   hermes_state.py: longer window for the WAL writer to finish a batch
   flush before the other process gives up entirely.

2. INSERT OR IGNORE in create_session
   hermes_state.py: prevents IntegrityError on duplicate session IDs
   (e.g. gateway restarts while CLI session is still alive).

3. Don't null out _session_db on create_session failure  (main fix)
   run_agent.py: a transient lock at agent startup must not permanently
   disable session_search for the lifetime of that agent instance.
   _session_db now stays alive so subsequent flushes and searches work
   once the lock clears.

4. New ensure_session() helper + call it during flush
   hermes_state.py: INSERT OR IGNORE for a minimal session row.
   run_agent.py _flush_messages_to_session_db: calls ensure_session()
   before appending messages, so the FK constraint is satisfied even
   when create_session() failed at startup. No-op when the row exists.

* fix(state): release lock between context queries in search_messages

The context-window queries (one per FTS5 match) were running inside
the same lock acquisition as the primary FTS5 query, holding the lock
for O(N) sequential SQLite round-trips. Move per-match context fetches
outside the outer lock block so each acquires the lock independently,
keeping critical sections short and allowing other threads to interleave.

* fix(session): prefer longer source in load_transcript to prevent legacy truncation

When a long-lived session pre-dates SQLite storage (e.g. sessions
created before the DB layer was introduced, or after a clean
deployment that reset the DB), _flush_messages_to_session_db only
writes the *new* messages from the current turn to SQLite — it skips
messages already present in conversation_history, assuming they are
already persisted.

That assumption fails for legacy JSONL-only sessions:

  Turn N (first after DB migration):
    load_transcript(id)       → SQLite: 0  → falls back to JSONL: 994 ✓
    _flush_messages_to_session_db: skip first 994, write 2 new → SQLite: 2

  Turn N+1:
    load_transcript(id)       → SQLite: 2  → returns immediately ✗
    Agent sees 2 messages of history instead of 996

The same pattern causes the reported symptom: session JSON truncated
to 4 messages (_save_session_log writes agent.messages which only has
2 history + 2 new = 4).

Fix: always load both sources and return whichever is longer.  For a
fully-migrated session SQLite will always be ≥ JSONL, so there is no
regression.  For a legacy session that hasn't been bootstrapped yet,
JSONL wins and the full history is restored.

Closes #3212

* test: add load_transcript source preference tests for #3212

Covers: JSONL longer returns JSONL, SQLite longer returns SQLite,
SQLite empty falls back to JSONL, both empty returns empty, equal
length prefers SQLite (richer reasoning fields).

---------

Co-authored-by: Mibayy <mibayy@hermes.ai>
Co-authored-by: kewe63 <kewe.3217@gmail.com>
Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 13:47:14 -07:00
+								        db_messages = []
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        # Try SQLite first
 								        if self._db:
 								            try:
-												fix(state): SQLite concurrency hardening + session transcript integrity (#3249)

* fix(session-db): survive CLI/gateway concurrent write contention

Closes #3139

Three layered fixes for the scenario where CLI and gateway write to
state.db concurrently, causing create_session() to fail with
'database is locked' and permanently disabling session_search on the
gateway side.

1. Increase SQLite connection timeout: 10s -> 30s
   hermes_state.py: longer window for the WAL writer to finish a batch
   flush before the other process gives up entirely.

2. INSERT OR IGNORE in create_session
   hermes_state.py: prevents IntegrityError on duplicate session IDs
   (e.g. gateway restarts while CLI session is still alive).

3. Don't null out _session_db on create_session failure  (main fix)
   run_agent.py: a transient lock at agent startup must not permanently
   disable session_search for the lifetime of that agent instance.
   _session_db now stays alive so subsequent flushes and searches work
   once the lock clears.

4. New ensure_session() helper + call it during flush
   hermes_state.py: INSERT OR IGNORE for a minimal session row.
   run_agent.py _flush_messages_to_session_db: calls ensure_session()
   before appending messages, so the FK constraint is satisfied even
   when create_session() failed at startup. No-op when the row exists.

* fix(state): release lock between context queries in search_messages

The context-window queries (one per FTS5 match) were running inside
the same lock acquisition as the primary FTS5 query, holding the lock
for O(N) sequential SQLite round-trips. Move per-match context fetches
outside the outer lock block so each acquires the lock independently,
keeping critical sections short and allowing other threads to interleave.

* fix(session): prefer longer source in load_transcript to prevent legacy truncation

When a long-lived session pre-dates SQLite storage (e.g. sessions
created before the DB layer was introduced, or after a clean
deployment that reset the DB), _flush_messages_to_session_db only
writes the *new* messages from the current turn to SQLite — it skips
messages already present in conversation_history, assuming they are
already persisted.

That assumption fails for legacy JSONL-only sessions:

  Turn N (first after DB migration):
    load_transcript(id)       → SQLite: 0  → falls back to JSONL: 994 ✓
    _flush_messages_to_session_db: skip first 994, write 2 new → SQLite: 2

  Turn N+1:
    load_transcript(id)       → SQLite: 2  → returns immediately ✗
    Agent sees 2 messages of history instead of 996

The same pattern causes the reported symptom: session JSON truncated
to 4 messages (_save_session_log writes agent.messages which only has
2 history + 2 new = 4).

Fix: always load both sources and return whichever is longer.  For a
fully-migrated session SQLite will always be ≥ JSONL, so there is no
regression.  For a legacy session that hasn't been bootstrapped yet,
JSONL wins and the full history is restored.

Closes #3212

* test: add load_transcript source preference tests for #3212

Covers: JSONL longer returns JSONL, SQLite longer returns SQLite,
SQLite empty falls back to JSONL, both empty returns empty, equal
length prefers SQLite (richer reasoning fields).

---------

Co-authored-by: Mibayy <mibayy@hermes.ai>
Co-authored-by: kewe63 <kewe.3217@gmail.com>
Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 13:47:14 -07:00
+								                db_messages = self._db.get_messages_as_conversation(session_id)
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								            except Exception as e:
 								                logger.debug("Could not load messages from DB: %s", e)
-												fix(state): SQLite concurrency hardening + session transcript integrity (#3249)

* fix(session-db): survive CLI/gateway concurrent write contention

Closes #3139

Three layered fixes for the scenario where CLI and gateway write to
state.db concurrently, causing create_session() to fail with
'database is locked' and permanently disabling session_search on the
gateway side.

1. Increase SQLite connection timeout: 10s -> 30s
   hermes_state.py: longer window for the WAL writer to finish a batch
   flush before the other process gives up entirely.

2. INSERT OR IGNORE in create_session
   hermes_state.py: prevents IntegrityError on duplicate session IDs
   (e.g. gateway restarts while CLI session is still alive).

3. Don't null out _session_db on create_session failure  (main fix)
   run_agent.py: a transient lock at agent startup must not permanently
   disable session_search for the lifetime of that agent instance.
   _session_db now stays alive so subsequent flushes and searches work
   once the lock clears.

4. New ensure_session() helper + call it during flush
   hermes_state.py: INSERT OR IGNORE for a minimal session row.
   run_agent.py _flush_messages_to_session_db: calls ensure_session()
   before appending messages, so the FK constraint is satisfied even
   when create_session() failed at startup. No-op when the row exists.

* fix(state): release lock between context queries in search_messages

The context-window queries (one per FTS5 match) were running inside
the same lock acquisition as the primary FTS5 query, holding the lock
for O(N) sequential SQLite round-trips. Move per-match context fetches
outside the outer lock block so each acquires the lock independently,
keeping critical sections short and allowing other threads to interleave.

* fix(session): prefer longer source in load_transcript to prevent legacy truncation

When a long-lived session pre-dates SQLite storage (e.g. sessions
created before the DB layer was introduced, or after a clean
deployment that reset the DB), _flush_messages_to_session_db only
writes the *new* messages from the current turn to SQLite — it skips
messages already present in conversation_history, assuming they are
already persisted.

That assumption fails for legacy JSONL-only sessions:

  Turn N (first after DB migration):
    load_transcript(id)       → SQLite: 0  → falls back to JSONL: 994 ✓
    _flush_messages_to_session_db: skip first 994, write 2 new → SQLite: 2

  Turn N+1:
    load_transcript(id)       → SQLite: 2  → returns immediately ✗
    Agent sees 2 messages of history instead of 996

The same pattern causes the reported symptom: session JSON truncated
to 4 messages (_save_session_log writes agent.messages which only has
2 history + 2 new = 4).

Fix: always load both sources and return whichever is longer.  For a
fully-migrated session SQLite will always be ≥ JSONL, so there is no
regression.  For a legacy session that hasn't been bootstrapped yet,
JSONL wins and the full history is restored.

Closes #3212

* test: add load_transcript source preference tests for #3212

Covers: JSONL longer returns JSONL, SQLite longer returns SQLite,
SQLite empty falls back to JSONL, both empty returns empty, equal
length prefers SQLite (richer reasoning fields).

---------

Co-authored-by: Mibayy <mibayy@hermes.ai>
Co-authored-by: kewe63 <kewe.3217@gmail.com>
Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 13:47:14 -07:00
 								        # Load legacy JSONL transcript (may contain more history than SQLite
 								        # for sessions created before the DB layer was introduced).
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        transcript_path = self.get_transcript_path(session_id)
-												fix(state): SQLite concurrency hardening + session transcript integrity (#3249)

* fix(session-db): survive CLI/gateway concurrent write contention

Closes #3139

Three layered fixes for the scenario where CLI and gateway write to
state.db concurrently, causing create_session() to fail with
'database is locked' and permanently disabling session_search on the
gateway side.

1. Increase SQLite connection timeout: 10s -> 30s
   hermes_state.py: longer window for the WAL writer to finish a batch
   flush before the other process gives up entirely.

2. INSERT OR IGNORE in create_session
   hermes_state.py: prevents IntegrityError on duplicate session IDs
   (e.g. gateway restarts while CLI session is still alive).

3. Don't null out _session_db on create_session failure  (main fix)
   run_agent.py: a transient lock at agent startup must not permanently
   disable session_search for the lifetime of that agent instance.
   _session_db now stays alive so subsequent flushes and searches work
   once the lock clears.

4. New ensure_session() helper + call it during flush
   hermes_state.py: INSERT OR IGNORE for a minimal session row.
   run_agent.py _flush_messages_to_session_db: calls ensure_session()
   before appending messages, so the FK constraint is satisfied even
   when create_session() failed at startup. No-op when the row exists.

* fix(state): release lock between context queries in search_messages

The context-window queries (one per FTS5 match) were running inside
the same lock acquisition as the primary FTS5 query, holding the lock
for O(N) sequential SQLite round-trips. Move per-match context fetches
outside the outer lock block so each acquires the lock independently,
keeping critical sections short and allowing other threads to interleave.

* fix(session): prefer longer source in load_transcript to prevent legacy truncation

When a long-lived session pre-dates SQLite storage (e.g. sessions
created before the DB layer was introduced, or after a clean
deployment that reset the DB), _flush_messages_to_session_db only
writes the *new* messages from the current turn to SQLite — it skips
messages already present in conversation_history, assuming they are
already persisted.

That assumption fails for legacy JSONL-only sessions:

  Turn N (first after DB migration):
    load_transcript(id)       → SQLite: 0  → falls back to JSONL: 994 ✓
    _flush_messages_to_session_db: skip first 994, write 2 new → SQLite: 2

  Turn N+1:
    load_transcript(id)       → SQLite: 2  → returns immediately ✗
    Agent sees 2 messages of history instead of 996

The same pattern causes the reported symptom: session JSON truncated
to 4 messages (_save_session_log writes agent.messages which only has
2 history + 2 new = 4).

Fix: always load both sources and return whichever is longer.  For a
fully-migrated session SQLite will always be ≥ JSONL, so there is no
regression.  For a legacy session that hasn't been bootstrapped yet,
JSONL wins and the full history is restored.

Closes #3212

* test: add load_transcript source preference tests for #3212

Covers: JSONL longer returns JSONL, SQLite longer returns SQLite,
SQLite empty falls back to JSONL, both empty returns empty, equal
length prefers SQLite (richer reasoning fields).

---------

Co-authored-by: Mibayy <mibayy@hermes.ai>
Co-authored-by: kewe63 <kewe.3217@gmail.com>
Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 13:47:14 -07:00
+								        jsonl_messages = []
 								        if transcript_path.exists():
 								            with open(transcript_path, "r", encoding="utf-8") as f:
 								                for line in f:
 								                    line = line.strip()
 								                    if line:
 								                        try:
 								                            jsonl_messages.append(json.loads(line))
 								                        except json.JSONDecodeError:
 								                            logger.warning(
 								                                "Skipping corrupt line in transcript %s: %s",
 								                                session_id, line[:120],
 								                            )
 								        # Prefer whichever source has more messages.
 								        #
 								        # Background: when a session pre-dates SQLite storage (or when the DB
 								        # layer was added while a long-lived session was already active), the
 								        # first post-migration turn writes only the *new* messages to SQLite
 								        # (because _flush_messages_to_session_db skips messages already in
 								        # conversation_history, assuming they're persisted).  On the *next*
 								        # turn load_transcript returns those few SQLite rows and ignores the
 								        # full JSONL history — the model sees a context of 1-4 messages instead
 								        # of hundreds.  Using the longer source prevents this silent truncation.
 								        if len(jsonl_messages) > len(db_messages):
 								            if db_messages:
 								                logger.debug(
 								                    "Session %s: JSONL has %d messages vs SQLite %d — "
 								                    "using JSONL (legacy session not yet fully migrated)",
 								                    session_id, len(jsonl_messages), len(db_messages),
 								                )
 								            return jsonl_messages
 								        return db_messages
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								def build_session_context(
 								    source: SessionSource,
 								    config: GatewayConfig,
 								    session_entry: Optional[SessionEntry] = None
 								) -> SessionContext:
 								    """
 								    Build a full session context from a source and config.
 								    This is used to inject context into the agent's system prompt.
 								    """
 								    connected = config.get_connected_platforms()
 								    home_channels = {}
 								    for platform in connected:
 								        home = config.get_home_channel(platform)
 								        if home:
 								            home_channels[platform] = home
 								    context = SessionContext(
 								        source=source,
 								        connected_platforms=connected,
 								        home_channels=home_channels,
-												fix(gateway): preserve sender attribution in shared group sessions

Generalize shared multi-user session handling so non-thread group sessions
(group_sessions_per_user=False) get the same treatment as shared threads:
inbound messages are prefixed with [sender name], and the session prompt
shows a multi-user note instead of pinning a single **User:** line into
the cached system prompt.

Before: build_session_key already treated these as shared sessions, but
_prepare_inbound_message_text and build_session_context_prompt only
recognized shared threads — creating cross-user attribution drift and
prompt-cache contamination in shared groups.

- Add is_shared_multi_user_session() helper alongside build_session_key()
  so both the session key and the multi-user branches are driven by the
  same rules (DMs never shared, threads shared unless
  thread_sessions_per_user, groups shared unless group_sessions_per_user).
- Add shared_multi_user_session field to SessionContext, populated by
  build_session_context() from config.
- Use context.shared_multi_user_session in the prompt builder (label is
  'Multi-user thread' when a thread is present, 'Multi-user session'
  otherwise).
- Use the helper in _prepare_inbound_message_text so non-thread shared
  groups also get [sender] prefixes.

Default behavior unchanged: DMs stay single-user, groups with
group_sessions_per_user=True still show the user normally, shared threads
keep their existing multi-user behavior.

Tests (65 passed):
- tests/gateway/test_session.py: new shared non-thread group prompt case.
- tests/gateway/test_shared_group_sender_prefix.py: inbound preprocessing
  for shared non-thread groups and default groups.

											
										
										
											2026-04-21 00:37:26 -07:00
+								        shared_multi_user_session=is_shared_multi_user_session(
 								            source,
 								            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
 								            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
 								        ),
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    )
 								    if session_entry:
 								        context.session_key = session_entry.session_key
 								        context.session_id = session_entry.session_id
 								        context.created_at = session_entry.created_at
 								        context.updated_at = session_entry.updated_at
 								    return context