gateway/platforms/base.py

"""
Base platform adapter interface.

All platform adapters (Telegram, Discord, WhatsApp) inherit from this
and implement the required methods.
"""

import asyncio
import inspect
import ipaddress
import logging
import os
import random
import re
import socket as _socket
import subprocess
import sys
import uuid
from abc import ABC, abstractmethod
from urllib.parse import urlsplit

from utils import normalize_proxy_url

logger = logging.getLogger(__name__)


def utf16_len(s: str) -> int:
    """Count UTF-16 code units in *s*.

    Telegram's message-length limit (4 096) is measured in UTF-16 code units,
    **not** Unicode code-points.  Characters outside the Basic Multilingual
    Plane (emoji like 😀, CJK Extension B, musical symbols, …) are encoded as
    surrogate pairs and therefore consume **two** UTF-16 code units each, even
    though Python's ``len()`` counts them as one.

    Ported from nearai/ironclaw#2304 which discovered the same discrepancy in
    Rust's ``chars().count()``.
    """
    return len(s.encode("utf-16-le")) // 2


def _prefix_within_utf16_limit(s: str, limit: int) -> str:
    """Return the longest prefix of *s* whose UTF-16 length ≤ *limit*.

    Unlike a plain ``s[:limit]``, this respects surrogate-pair boundaries so
    we never slice a multi-code-unit character in half.
    """
    if utf16_len(s) <= limit:
        return s
    # Binary search for the longest safe prefix
    lo, hi = 0, len(s)
    while lo < hi:
        mid = (lo + hi + 1) // 2
        if utf16_len(s[:mid]) <= limit:
            lo = mid
        else:
            hi = mid - 1
    return s[:lo]


def _custom_unit_to_cp(s: str, budget: int, len_fn) -> int:
    """Return the largest codepoint offset *n* such that ``len_fn(s[:n]) <= budget``.

    Used by :meth:`BasePlatformAdapter.truncate_message` when *len_fn* measures
    length in units different from Python codepoints (e.g. UTF-16 code units).
    Falls back to binary search which is O(log n) calls to *len_fn*.
    """
    if len_fn(s) <= budget:
        return len(s)
    lo, hi = 0, len(s)
    while lo < hi:
        mid = (lo + hi + 1) // 2
        if len_fn(s[:mid]) <= budget:
            lo = mid
        else:
            hi = mid - 1
    return lo


def is_network_accessible(host: str) -> bool:
    """Return True if *host* would expose the server beyond loopback.

    Loopback addresses (127.0.0.1, ::1, IPv4-mapped ::ffff:127.0.0.1)
    are local-only.  Unspecified addresses (0.0.0.0, ::) bind all
    interfaces.  Hostnames are resolved; DNS failure fails closed.
    """
    try:
        addr = ipaddress.ip_address(host)
        if addr.is_loopback:
            return False
        # ::ffff:127.0.0.1 — Python reports is_loopback=False for mapped
        # addresses, so check the underlying IPv4 explicitly.
        if getattr(addr, "ipv4_mapped", None) and addr.ipv4_mapped.is_loopback:
            return False
        return True
    except ValueError:
        # when host variable is a hostname, we should try to resolve below
        pass

    try:
        resolved = _socket.getaddrinfo(
            host, None, _socket.AF_UNSPEC, _socket.SOCK_STREAM,
        )
        # if the hostname resolves into at least one non-loopback address,
        # then we consider it to be network accessible
        for _family, _type, _proto, _canonname, sockaddr in resolved:
            addr = ipaddress.ip_address(sockaddr[0])
            if not addr.is_loopback:
                return True
        return False
    except (_socket.gaierror, OSError):
        return True


def _detect_macos_system_proxy() -> str | None:
    """Read the macOS system HTTP(S) proxy via ``scutil --proxy``.

    Returns an ``http://host:port`` URL string if an HTTP or HTTPS proxy is
    enabled, otherwise *None*.  Falls back silently on non-macOS or on any
    subprocess error.
    """
    if sys.platform != "darwin":
        return None
    try:
        out = subprocess.check_output(
            ["scutil", "--proxy"], timeout=3, text=True, stderr=subprocess.DEVNULL,
        )
    except Exception:
        return None

    props: dict[str, str] = {}
    for line in out.splitlines():
        line = line.strip()
        if " : " in line:
            key, _, val = line.partition(" : ")
            props[key.strip()] = val.strip()

    # Prefer HTTPS, fall back to HTTP
    for enable_key, host_key, port_key in (
        ("HTTPSEnable", "HTTPSProxy", "HTTPSPort"),
        ("HTTPEnable", "HTTPProxy", "HTTPPort"),
    ):
        if props.get(enable_key) == "1":
            host = props.get(host_key)
            port = props.get(port_key)
            if host and port:
                return f"http://{host}:{port}"
    return None


def _split_host_port(value: str) -> tuple[str, int | None]:
    raw = str(value or "").strip()
    if not raw:
        return "", None
    if "://" in raw:
        parsed = urlsplit(raw)
        return (parsed.hostname or "").lower().rstrip("."), parsed.port
    if raw.startswith("[") and "]" in raw:
        host, _, rest = raw[1:].partition("]")
        port = None
        if rest.startswith(":") and rest[1:].isdigit():
            port = int(rest[1:])
        return host.lower().rstrip("."), port
    if raw.count(":") == 1:
        host, _, maybe_port = raw.rpartition(":")
        if maybe_port.isdigit():
            return host.lower().rstrip("."), int(maybe_port)
    return raw.lower().strip("[]").rstrip("."), None


def _no_proxy_entries() -> list[str]:
    entries: list[str] = []
    for key in ("NO_PROXY", "no_proxy"):
        raw = os.environ.get(key, "")
        entries.extend(part.strip() for part in raw.split(",") if part.strip())
    return entries


def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
    token = str(entry or "").strip().lower()
    if not token:
        return False
    if token == "*":
        return True

    token_host, token_port = _split_host_port(token)
    if token_port is not None and port is not None and token_port != port:
        return False
    if token_port is not None and port is None:
        return False
    if not token_host:
        return False

    try:
        network = ipaddress.ip_network(token_host, strict=False)
        try:
            return ipaddress.ip_address(host) in network
        except ValueError:
            return False
    except ValueError:
        pass

    try:
        token_ip = ipaddress.ip_address(token_host)
        try:
            return ipaddress.ip_address(host) == token_ip
        except ValueError:
            return False
    except ValueError:
        pass

    if token_host.startswith("*."):
        suffix = token_host[1:]
        return host.endswith(suffix)
    if token_host.startswith("."):
        return host == token_host[1:] or host.endswith(token_host)
    return host == token_host or host.endswith(f".{token_host}")


def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
    """Return True when NO_PROXY/no_proxy matches at least one target host.

    Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
    CIDR ranges, optional host:port entries, and ``*``.
    """
    entries = _no_proxy_entries()
    if not entries or not target_hosts:
        return False
    if isinstance(target_hosts, str):
        candidates = [target_hosts]
    else:
        candidates = list(target_hosts)
    for candidate in candidates:
        host, port = _split_host_port(str(candidate))
        if not host:
            continue
        if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
            return True
    return False


def resolve_proxy_url(
    platform_env_var: str | None = None,
    *,
    target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
) -> str | None:
    """Return a proxy URL from env vars, or macOS system proxy.

    Check order:
      0. *platform_env_var* (e.g. ``DISCORD_PROXY``) — highest priority
      1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
      2. macOS system proxy via ``scutil --proxy`` (auto-detect)

    Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
    of ``target_hosts``.
    """
    if platform_env_var:
        value = (os.environ.get(platform_env_var) or "").strip()
        if value:
            if should_bypass_proxy(target_hosts):
                return None
            return normalize_proxy_url(value)
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = (os.environ.get(key) or "").strip()
        if value:
            if should_bypass_proxy(target_hosts):
                return None
            return normalize_proxy_url(value)
    detected = normalize_proxy_url(_detect_macos_system_proxy())
    if detected and should_bypass_proxy(target_hosts):
        return None
    return detected


def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
    """Build kwargs for ``commands.Bot()`` / ``discord.Client()`` with proxy.

    Returns:
      - SOCKS URL  → ``{"connector": ProxyConnector(..., rdns=True)}``
      - HTTP URL   → ``{"proxy": url}``
      - *None*     → ``{}``

    ``rdns=True`` forces remote DNS resolution through the proxy — required
    by many SOCKS implementations (Shadowrocket, Clash) and essential for
    bypassing DNS pollution behind the GFW.
    """
    if not proxy_url:
        return {}
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector

            connector = ProxyConnector.from_url(proxy_url, rdns=True)
            return {"connector": connector}
        except ImportError:
            logger.warning(
                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
                "Run: pip install aiohttp-socks",
                proxy_url,
            )
            return {}
    return {"proxy": proxy_url}


def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    """Build kwargs for standalone ``aiohttp.ClientSession`` with proxy.

    Returns ``(session_kwargs, request_kwargs)`` where:
      - SOCKS → ``({"connector": ProxyConnector(...)}, {})``
      - HTTP  → ``({}, {"proxy": url})``
      - None  → ``({}, {})``

    Usage::

        sess_kw, req_kw = proxy_kwargs_for_aiohttp(proxy_url)
        async with aiohttp.ClientSession(**sess_kw) as session:
            async with session.get(url, **req_kw) as resp:
                ...
    """
    if not proxy_url:
        return {}, {}
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector

            connector = ProxyConnector.from_url(proxy_url, rdns=True)
            return {"connector": connector}, {}
        except ImportError:
            logger.warning(
                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
                "Run: pip install aiohttp-socks",
                proxy_url,
            )
            return {}, {}
    return {}, {"proxy": proxy_url}


from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
from enum import Enum

from pathlib import Path as _Path
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))

from gateway.config import Platform, PlatformConfig
from gateway.session import SessionSource, build_session_key
from hermes_constants import get_hermes_dir


GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
    "Secure secret entry is not supported over messaging. "
    "Load this skill in the local CLI to be prompted, or add the key to ~/.hermes/.env manually."
)


def safe_url_for_log(url: str, max_len: int = 80) -> str:
    """Return a URL string safe for logs (no query/fragment/userinfo)."""
    if max_len <= 0:
        return ""

    if url is None:
        return ""

    raw = str(url)
    if not raw:
        return ""

    try:
        parsed = urlsplit(raw)
    except Exception:
        return raw[:max_len]

    if parsed.scheme and parsed.netloc:
        # Strip potential embedded credentials (user:pass@host).
        netloc = parsed.netloc.rsplit("@", 1)[-1]
        base = f"{parsed.scheme}://{netloc}"
        path = parsed.path or ""
        if path and path != "/":
            basename = path.rsplit("/", 1)[-1]
            safe = f"{base}/.../{basename}" if basename else f"{base}/..."
        else:
            safe = base
    else:
        safe = raw

    if len(safe) <= max_len:
        return safe
    if max_len <= 3:
        return "." * max_len
    return f"{safe[:max_len - 3]}..."


async def _ssrf_redirect_guard(response):
    """Re-validate each redirect target to prevent redirect-based SSRF.

    Without this, an attacker can host a public URL that 302-redirects to
    http://169.254.169.254/ and bypass the pre-flight is_safe_url() check.

    Must be async because httpx.AsyncClient awaits response event hooks.
    """
    if response.is_redirect and response.next_request:
        redirect_url = str(response.next_request.url)
        from tools.url_safety import is_safe_url
        if not is_safe_url(redirect_url):
            raise ValueError(
                f"Blocked redirect to private/internal address: {safe_url_for_log(redirect_url)}"
            )


# ---------------------------------------------------------------------------
# Image cache utilities
#
# When users send images on messaging platforms, we download them to a local
# cache directory so they can be analyzed by the vision tool (which accepts
# local file paths). This avoids issues with ephemeral platform URLs
# (e.g. Telegram file URLs expire after ~1 hour).
# ---------------------------------------------------------------------------

# Default location: {HERMES_HOME}/cache/images/ (legacy: image_cache/)
IMAGE_CACHE_DIR = get_hermes_dir("cache/images", "image_cache")


def get_image_cache_dir() -> Path:
    """Return the image cache directory, creating it if it doesn't exist."""
    IMAGE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
    return IMAGE_CACHE_DIR


def _looks_like_image(data: bytes) -> bool:
    """Return True if *data* starts with a known image magic-byte sequence."""
    if len(data) < 4:
        return False
    if data[:8] == b"\x89PNG\r\n\x1a\n":
        return True
    if data[:3] == b"\xff\xd8\xff":
        return True
    if data[:6] in (b"GIF87a", b"GIF89a"):
        return True
    if data[:2] == b"BM":
        return True
    if data[:4] == b"RIFF" and len(data) >= 12 and data[8:12] == b"WEBP":
        return True
    return False


def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
    """
    Save raw image bytes to the cache and return the absolute file path.

    Args:
        data: Raw image bytes.
        ext:  File extension including the dot (e.g. ".jpg", ".png").

    Returns:
        Absolute path to the cached image file as a string.

    Raises:
        ValueError: If *data* does not look like a valid image (e.g. an HTML
            error page returned by the upstream server).
    """
    if not _looks_like_image(data):
        snippet = data[:80].decode("utf-8", errors="replace")
        raise ValueError(
            f"Refusing to cache non-image data as {ext} "
            f"(starts with: {snippet!r})"
        )
    cache_dir = get_image_cache_dir()
    filename = f"img_{uuid.uuid4().hex[:12]}{ext}"
    filepath = cache_dir / filename
    filepath.write_bytes(data)
    return str(filepath)


async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) -> str:
    """
    Download an image from a URL and save it to the local cache.

    Retries on transient failures (timeouts, 429, 5xx) with exponential
    backoff so a single slow CDN response doesn't lose the media.

    Args:
        url: The HTTP/HTTPS URL to download from.
        ext: File extension including the dot (e.g. ".jpg", ".png").
        retries: Number of retry attempts on transient failures.

    Returns:
        Absolute path to the cached image file as a string.

    Raises:
        ValueError: If the URL targets a private/internal network (SSRF protection).
    """
    from tools.url_safety import is_safe_url
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

    import httpx
    _log = logging.getLogger(__name__)

    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
        event_hooks={"response": [_ssrf_redirect_guard]},
    ) as client:
        for attempt in range(retries + 1):
            try:
                response = await client.get(
                    url,
                    headers={
                        "User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
                        "Accept": "image/*,*/*;q=0.8",
                    },
                )
                response.raise_for_status()
                return cache_image_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
                    wait = 1.5 * (attempt + 1)
                    _log.debug(
                        "Media cache retry %d/%d for %s (%.1fs): %s",
                        attempt + 1,
                        retries,
                        safe_url_for_log(url),
                        wait,
                        exc,
                    )
                    await asyncio.sleep(wait)
                    continue
                raise


def cleanup_image_cache(max_age_hours: int = 24) -> int:
    """
    Delete cached images older than *max_age_hours*.

    Returns the number of files removed.
    """
    import time

    cache_dir = get_image_cache_dir()
    cutoff = time.time() - (max_age_hours * 3600)
    removed = 0
    for f in cache_dir.iterdir():
        if f.is_file() and f.stat().st_mtime < cutoff:
            try:
                f.unlink()
                removed += 1
            except OSError:
                pass
    return removed


# ---------------------------------------------------------------------------
# Audio cache utilities
#
# Same pattern as image cache -- voice messages from platforms are downloaded
# here so the STT tool (OpenAI Whisper) can transcribe them from local files.
# ---------------------------------------------------------------------------

AUDIO_CACHE_DIR = get_hermes_dir("cache/audio", "audio_cache")


def get_audio_cache_dir() -> Path:
    """Return the audio cache directory, creating it if it doesn't exist."""
    AUDIO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
    return AUDIO_CACHE_DIR


def cache_audio_from_bytes(data: bytes, ext: str = ".ogg") -> str:
    """
    Save raw audio bytes to the cache and return the absolute file path.

    Args:
        data: Raw audio bytes.
        ext:  File extension including the dot (e.g. ".ogg", ".mp3").

    Returns:
        Absolute path to the cached audio file as a string.
    """
    cache_dir = get_audio_cache_dir()
    filename = f"audio_{uuid.uuid4().hex[:12]}{ext}"
    filepath = cache_dir / filename
    filepath.write_bytes(data)
    return str(filepath)


async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) -> str:
    """
    Download an audio file from a URL and save it to the local cache.

    Retries on transient failures (timeouts, 429, 5xx) with exponential
    backoff so a single slow CDN response doesn't lose the media.

    Args:
        url: The HTTP/HTTPS URL to download from.
        ext: File extension including the dot (e.g. ".ogg", ".mp3").
        retries: Number of retry attempts on transient failures.

    Returns:
        Absolute path to the cached audio file as a string.

    Raises:
        ValueError: If the URL targets a private/internal network (SSRF protection).
    """
    from tools.url_safety import is_safe_url
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

    import httpx
    _log = logging.getLogger(__name__)

    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
        event_hooks={"response": [_ssrf_redirect_guard]},
    ) as client:
        for attempt in range(retries + 1):
            try:
                response = await client.get(
                    url,
                    headers={
                        "User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
                        "Accept": "audio/*,*/*;q=0.8",
                    },
                )
                response.raise_for_status()
                return cache_audio_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
                    wait = 1.5 * (attempt + 1)
                    _log.debug(
                        "Audio cache retry %d/%d for %s (%.1fs): %s",
                        attempt + 1,
                        retries,
                        safe_url_for_log(url),
                        wait,
                        exc,
                    )
                    await asyncio.sleep(wait)
                    continue
                raise


# ---------------------------------------------------------------------------
# Video cache utilities
#
# Same pattern as image/audio cache -- videos from platforms are downloaded
# here so the agent can reference them by local file path.
# ---------------------------------------------------------------------------

VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache")

SUPPORTED_VIDEO_TYPES = {
    ".mp4": "video/mp4",
    ".mov": "video/quicktime",
    ".webm": "video/webm",
    ".mkv": "video/x-matroska",
    ".avi": "video/x-msvideo",
}


def get_video_cache_dir() -> Path:
    """Return the video cache directory, creating it if it doesn't exist."""
    VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
    return VIDEO_CACHE_DIR


def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
    """Save raw video bytes to the cache and return the absolute file path."""
    cache_dir = get_video_cache_dir()
    filename = f"video_{uuid.uuid4().hex[:12]}{ext}"
    filepath = cache_dir / filename
    filepath.write_bytes(data)
    return str(filepath)


# ---------------------------------------------------------------------------
# Document cache utilities
#
# Same pattern as image/audio cache -- documents from platforms are downloaded
# here so the agent can reference them by local file path.
# ---------------------------------------------------------------------------

DOCUMENT_CACHE_DIR = get_hermes_dir("cache/documents", "document_cache")

SUPPORTED_DOCUMENT_TYPES = {
    ".pdf": "application/pdf",
    ".md": "text/markdown",
    ".txt": "text/plain",
    ".log": "text/plain",
    ".zip": "application/zip",
    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
}


def get_document_cache_dir() -> Path:
    """Return the document cache directory, creating it if it doesn't exist."""
    DOCUMENT_CACHE_DIR.mkdir(parents=True, exist_ok=True)
    return DOCUMENT_CACHE_DIR


def cache_document_from_bytes(data: bytes, filename: str) -> str:
    """
    Save raw document bytes to the cache and return the absolute file path.

    The cached filename preserves the original human-readable name with a
    unique prefix: ``doc_{uuid12}_{original_filename}``.

    Args:
        data: Raw document bytes.
        filename: Original filename (e.g. "report.pdf").

    Returns:
        Absolute path to the cached document file as a string.

    Raises:
        ValueError: If the sanitized path escapes the cache directory.
    """
    cache_dir = get_document_cache_dir()
    # Sanitize: strip directory components, null bytes, and control characters
    safe_name = Path(filename).name if filename else "document"
    safe_name = safe_name.replace("\x00", "").strip()
    if not safe_name or safe_name in (".", ".."):
        safe_name = "document"
    cached_name = f"doc_{uuid.uuid4().hex[:12]}_{safe_name}"
    filepath = cache_dir / cached_name
    # Final safety check: ensure path stays inside cache dir
    if not filepath.resolve().is_relative_to(cache_dir.resolve()):
        raise ValueError(f"Path traversal rejected: {filename!r}")
    filepath.write_bytes(data)
    return str(filepath)


def cleanup_document_cache(max_age_hours: int = 24) -> int:
    """
    Delete cached documents older than *max_age_hours*.

    Returns the number of files removed.
    """
    import time

    cache_dir = get_document_cache_dir()
    cutoff = time.time() - (max_age_hours * 3600)
    removed = 0
    for f in cache_dir.iterdir():
        if f.is_file() and f.stat().st_mtime < cutoff:
            try:
                f.unlink()
                removed += 1
            except OSError:
                pass
    return removed


class MessageType(Enum):
    """Types of incoming messages."""
    TEXT = "text"
    LOCATION = "location"
    PHOTO = "photo"
    VIDEO = "video"
    AUDIO = "audio"
    VOICE = "voice"
    DOCUMENT = "document"
    STICKER = "sticker"
    COMMAND = "command"  # /command style


class ProcessingOutcome(Enum):
    """Result classification for message-processing lifecycle hooks."""

    SUCCESS = "success"
    FAILURE = "failure"
    CANCELLED = "cancelled"


@dataclass
class MessageEvent:
    """
    Incoming message from a platform.
    
    Normalized representation that all adapters produce.
    """
    # Message content
    text: str
    message_type: MessageType = MessageType.TEXT
    
    # Source information
    source: SessionSource = None
    
    # Original platform data
    raw_message: Any = None
    message_id: Optional[str] = None

    # Platform-specific update identifier.  For Telegram this is the
    # ``update_id`` from the PTB Update wrapper; other platforms currently
    # ignore it.  Used by ``/restart`` to record the triggering update so the
    # new gateway can advance the Telegram offset past it and avoid processing
    # the same ``/restart`` twice if PTB's graceful-shutdown ACK times out
    # ("Error while calling `get_updates` one more time to mark all fetched
    # updates" in gateway.log).
    platform_update_id: Optional[int] = None
    
    # Media attachments
    # media_urls: local file paths (for vision tool access)
    media_urls: List[str] = field(default_factory=list)
    media_types: List[str] = field(default_factory=list)
    
    # Reply context
    reply_to_message_id: Optional[str] = None
    reply_to_text: Optional[str] = None  # Text of the replied-to message (for context injection)
    
    # Auto-loaded skill(s) for topic/channel bindings (e.g., Telegram DM Topics,
    # Discord channel_skill_bindings).  A single name or ordered list.
    auto_skill: Optional[str | list[str]] = None

    # Per-channel ephemeral system prompt (e.g. Discord channel_prompts).
    # Applied at API call time and never persisted to transcript history.
    channel_prompt: Optional[str] = None
    
    # Internal flag — set for synthetic events (e.g. background process
    # completion notifications) that must bypass user authorization checks.
    internal: bool = False

    # Timestamps
    timestamp: datetime = field(default_factory=datetime.now)
    
    def is_command(self) -> bool:
        """Check if this is a command message (e.g., /new, /reset)."""
        return self.text.startswith("/")
    
    def get_command(self) -> Optional[str]:
        """Extract command name if this is a command message."""
        if not self.is_command():
            return None
        # Split on space and get first word, strip the /
        parts = self.text.split(maxsplit=1)
        raw = parts[0][1:].lower() if parts else None
        if raw and "@" in raw:
            raw = raw.split("@", 1)[0]
        # Reject file paths: valid command names never contain /
        if raw and "/" in raw:
            return None
        return raw
    
    def get_command_args(self) -> str:
        """Get the arguments after a command."""
        if not self.is_command():
            return self.text
        parts = self.text.split(maxsplit=1)
        args = parts[1] if len(parts) > 1 else ""
        # iOS auto-corrects -- to — (em dash) and - to – (en dash)
        args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
        return args


@dataclass 
class SendResult:
    """Result of sending a message."""
    success: bool
    message_id: Optional[str] = None
    error: Optional[str] = None
    raw_response: Any = None
    retryable: bool = False  # True for transient connection errors — base will retry automatically


def merge_pending_message_event(
    pending_messages: Dict[str, MessageEvent],
    session_key: str,
    event: MessageEvent,
    *,
    merge_text: bool = False,
) -> None:
    """Store or merge a pending event for a session.

    Photo bursts/albums often arrive as multiple near-simultaneous PHOTO
    events. Merge those into the existing queued event so the next turn sees
    the whole burst.

    When ``merge_text`` is enabled, rapid follow-up TEXT events are appended
    instead of replacing the pending turn. This is used for Telegram bursty
    follow-ups so a multi-part user thought is not silently truncated to only
    the last queued fragment.
    """
    existing = pending_messages.get(session_key)
    if existing:
        existing_is_photo = getattr(existing, "message_type", None) == MessageType.PHOTO
        incoming_is_photo = event.message_type == MessageType.PHOTO
        existing_has_media = bool(existing.media_urls)
        incoming_has_media = bool(event.media_urls)

        if existing_is_photo and incoming_is_photo:
            existing.media_urls.extend(event.media_urls)
            existing.media_types.extend(event.media_types)
            if event.text:
                existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
            return

        if existing_has_media or incoming_has_media:
            if incoming_has_media:
                existing.media_urls.extend(event.media_urls)
                existing.media_types.extend(event.media_types)
            if event.text:
                if existing.text:
                    existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
                else:
                    existing.text = event.text
            if existing_is_photo or incoming_is_photo:
                existing.message_type = MessageType.PHOTO
            return

        if (
            merge_text
            and getattr(existing, "message_type", None) == MessageType.TEXT
            and event.message_type == MessageType.TEXT
        ):
            if event.text:
                existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
            return

    pending_messages[session_key] = event


# Error substrings that indicate a transient *connection* failure worth retrying.
# "timeout" / "timed out" / "readtimeout" / "writetimeout" are intentionally
# excluded: a read/write timeout on a non-idempotent call (e.g. send_message)
# means the request may have reached the server — retrying risks duplicate
# delivery.  "connecttimeout" is safe because the connection was never
# established.  Platforms that know a timeout is safe to retry should set
# SendResult.retryable = True explicitly.
_RETRYABLE_ERROR_PATTERNS = (
    "connecterror",
    "connectionerror",
    "connectionreset",
    "connectionrefused",
    "connecttimeout",
    "network",
    "broken pipe",
    "remotedisconnected",
    "eoferror",
)


# Type for message handlers
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]


def resolve_channel_prompt(
    config_extra: dict,
    channel_id: str,
    parent_id: str | None = None,
) -> str | None:
    """Resolve a per-channel ephemeral prompt from platform config.

    Looks up ``channel_prompts`` in the adapter's ``config.extra`` dict.
    Prefers an exact match on *channel_id*; falls back to *parent_id*
    (useful for forum threads / child channels inheriting a parent prompt).

    Returns the prompt string, or None if no match is found.  Blank/whitespace-
    only prompts are treated as absent.
    """
    prompts = config_extra.get("channel_prompts") or {}
    if not isinstance(prompts, dict):
        return None

    for key in (channel_id, parent_id):
        if not key:
            continue
        prompt = prompts.get(key)
        if prompt is None:
            continue
        prompt = str(prompt).strip()
        if prompt:
            return prompt
    return None


class BasePlatformAdapter(ABC):
    """
    Base class for platform adapters.
    
    Subclasses implement platform-specific logic for:
    - Connecting and authenticating
    - Receiving messages
    - Sending messages/responses
    - Handling media
    """
    
    def __init__(self, config: PlatformConfig, platform: Platform):
        self.config = config
        self.platform = platform
        self._message_handler: Optional[MessageHandler] = None
        self._running = False
        self._fatal_error_code: Optional[str] = None
        self._fatal_error_message: Optional[str] = None
        self._fatal_error_retryable = True
        self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
        
        # Track active message handlers per session for interrupt support.
        # _active_sessions stores the per-session interrupt Event; _session_tasks
        # maps session → the specific Task currently processing it so that
        # session-terminating commands (/stop, /new, /reset) can cancel the
        # right task and release the adapter-level guard deterministically.
        # Without the owner-task map, an old task's finally block could delete
        # a newer task's guard, leaving stale busy state.
        self._active_sessions: Dict[str, asyncio.Event] = {}
        self._pending_messages: Dict[str, MessageEvent] = {}
        self._session_tasks: Dict[str, asyncio.Task] = {}
        # Background message-processing tasks spawned by handle_message().
        # Gateway shutdown cancels these so an old gateway instance doesn't keep
        # working on a task after --replace or manual restarts.
        self._background_tasks: set[asyncio.Task] = set()
        # One-shot callbacks to fire after the main response is delivered.
        # Keyed by session_key. Values are either a bare callback (legacy) or
        # a ``(generation, callback)`` tuple so GatewayRunner can make deferred
        # deliveries generation-aware and avoid stale runs clearing callbacks
        # registered by a fresher run for the same session.
        self._post_delivery_callbacks: Dict[str, Any] = {}
        self._expected_cancelled_tasks: set[asyncio.Task] = set()
        self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
        # Chats where auto-TTS on voice input is disabled (set by /voice off)
        self._auto_tts_disabled_chats: set = set()
        # Chats where typing indicator is paused (e.g. during approval waits).
        # _keep_typing skips send_typing when the chat_id is in this set.
        self._typing_paused: set = set()

    @property
    def has_fatal_error(self) -> bool:
        return self._fatal_error_message is not None

    @property
    def fatal_error_message(self) -> Optional[str]:
        return self._fatal_error_message

    @property
    def fatal_error_code(self) -> Optional[str]:
        return self._fatal_error_code

    @property
    def fatal_error_retryable(self) -> bool:
        return self._fatal_error_retryable

    def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
        self._fatal_error_handler = handler

    def _mark_connected(self) -> None:
        self._running = True
        self._fatal_error_code = None
        self._fatal_error_message = None
        self._fatal_error_retryable = True
        try:
            from gateway.status import write_runtime_status
            write_runtime_status(platform=self.platform.value, platform_state="connected", error_code=None, error_message=None)
        except Exception:
            pass

    def _mark_disconnected(self) -> None:
        self._running = False
        if self.has_fatal_error:
            return
        try:
            from gateway.status import write_runtime_status
            write_runtime_status(platform=self.platform.value, platform_state="disconnected", error_code=None, error_message=None)
        except Exception:
            pass

    def _set_fatal_error(self, code: str, message: str, *, retryable: bool) -> None:
        self._running = False
        self._fatal_error_code = code
        self._fatal_error_message = message
        self._fatal_error_retryable = retryable
        try:
            from gateway.status import write_runtime_status
            write_runtime_status(
                platform=self.platform.value,
                platform_state="fatal",
                error_code=code,
                error_message=message,
            )
        except Exception:
            pass

    async def _notify_fatal_error(self) -> None:
        handler = self._fatal_error_handler
        if not handler:
            return
        result = handler(self)
        if asyncio.iscoroutine(result):
            await result

    def _acquire_platform_lock(self, scope: str, identity: str, resource_desc: str) -> bool:
        """Acquire a scoped lock for this adapter. Returns True on success."""
        from gateway.status import acquire_scoped_lock
        self._platform_lock_scope = scope
        self._platform_lock_identity = identity
        acquired, existing = acquire_scoped_lock(
            scope, identity, metadata={'platform': self.platform.value}
        )
        if acquired:
            return True
        owner_pid = existing.get('pid') if isinstance(existing, dict) else None
        message = (
            f'{resource_desc} already in use'
            + (f' (PID {owner_pid})' if owner_pid else '')
            + '. Stop the other gateway first.'
        )
        logger.error('[%s] %s', self.name, message)
        self._set_fatal_error(f'{scope}_lock', message, retryable=False)
        return False

    def _release_platform_lock(self) -> None:
        """Release the scoped lock acquired by _acquire_platform_lock."""
        identity = getattr(self, '_platform_lock_identity', None)
        if not identity:
            return
        from gateway.status import release_scoped_lock
        release_scoped_lock(self._platform_lock_scope, identity)
        self._platform_lock_identity = None

    @property
    def name(self) -> str:
        """Human-readable name for this adapter."""
        return self.platform.value.title()
    
    @property
    def is_connected(self) -> bool:
        """Check if adapter is currently connected."""
        return self._running
    
    def set_message_handler(self, handler: MessageHandler) -> None:
        """
        Set the handler for incoming messages.
        
        The handler receives a MessageEvent and should return
        an optional response string.
        """
        self._message_handler = handler

    def set_busy_session_handler(self, handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]]) -> None:
        """Set an optional handler for messages arriving during active sessions."""
        self._busy_session_handler = handler
    
    def set_session_store(self, session_store: Any) -> None:
        """
        Set the session store for checking active sessions.
        
        Used by adapters that need to check if a thread/conversation
        has an active session before processing messages (e.g., Slack
        thread replies without explicit mentions).
        """
        self._session_store = session_store
    
    @abstractmethod
    async def connect(self) -> bool:
        """
        Connect to the platform and start receiving messages.
        
        Returns True if connection was successful.
        """
        pass
    
    @abstractmethod
    async def disconnect(self) -> None:
        """Disconnect from the platform."""
        pass
    
    @abstractmethod
    async def send(
        self,
        chat_id: str,
        content: str,
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None
    ) -> SendResult:
        """
        Send a message to a chat.
        
        Args:
            chat_id: The chat/channel ID to send to
            content: Message content (may be markdown)
            reply_to: Optional message ID to reply to
            metadata: Additional platform-specific options
        
        Returns:
            SendResult with success status and message ID
        """
        pass

    # Default: the adapter treats ``finalize=True`` on edit_message as a
    # no-op and is happy to have the stream consumer skip redundant final
    # edits.  Subclasses that *require* an explicit finalize call to close
    # out the message lifecycle (e.g. rich card / AI assistant surfaces
    # such as DingTalk AI Cards) override this to True (class attribute or
    # property) so the stream consumer knows not to short-circuit.
    REQUIRES_EDIT_FINALIZE: bool = False

    async def edit_message(
        self,
        chat_id: str,
        message_id: str,
        content: str,
        *,
        finalize: bool = False,
    ) -> SendResult:
        """
        Edit a previously sent message. Optional — platforms that don't
        support editing return success=False and callers fall back to
        sending a new message.

        ``finalize`` signals that this is the last edit in a streaming
        sequence.  Most platforms (Telegram, Slack, Discord, Matrix,
        etc.) treat it as a no-op because their edit APIs have no notion
        of message lifecycle state — an edit is an edit.  Platforms that
        render streaming updates with a distinct "in progress" state and
        require explicit closure (e.g. rich card / AI assistant surfaces
        such as DingTalk AI Cards) use it to finalize the message and
        transition the UI out of the streaming indicator — those should
        also set ``REQUIRES_EDIT_FINALIZE = True`` so callers route a
        final edit through even when content is unchanged.  Callers
        should set ``finalize=True`` on the final edit of a streamed
        response (typically when ``got_done`` fires in the stream
        consumer) and leave it ``False`` on intermediate edits.
        """
        return SendResult(success=False, error="Not supported")

    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """
        Send a typing indicator.
        
        Override in subclasses if the platform supports it.
        metadata: optional dict with platform-specific context (e.g. thread_id for Slack).
        """
        pass

    async def stop_typing(self, chat_id: str) -> None:
        """Stop a persistent typing indicator (if the platform uses one).

        Override in subclasses that start background typing loops.
        Default is a no-op for platforms with one-shot typing indicators.
        """
        pass
    
    async def send_image(
        self,
        chat_id: str,
        image_url: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """
        Send an image natively via the platform API.
        
        Override in subclasses to send images as proper attachments
        instead of plain-text URLs. Default falls back to sending the
        URL as a text message.
        """
        # Fallback: send URL as text (subclasses override for native images)
        text = f"{caption}\n{image_url}" if caption else image_url
        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
    
    async def send_animation(
        self,
        chat_id: str,
        animation_url: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """
        Send an animated GIF natively via the platform API.
        
        Override in subclasses to send GIFs as proper animations
        (e.g., Telegram send_animation) so they auto-play inline.
        Default falls back to send_image.
        """
        return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to, metadata=metadata)
    
    @staticmethod
    def _is_animation_url(url: str) -> bool:
        """Check if a URL points to an animated GIF (vs a static image)."""
        lower = url.lower().split('?')[0]  # Strip query params
        return lower.endswith('.gif')

    @staticmethod
    def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]:
        """
        Extract image URLs from markdown and HTML image tags in a response.
        
        Finds patterns like:
        - ![alt text](https://example.com/image.png)
        - <img src="https://example.com/image.png">
        - <img src="https://example.com/image.png"></img>
        
        Args:
            content: The response text to scan.
        
        Returns:
            Tuple of (list of (url, alt_text) pairs, cleaned content with image tags removed).
        """
        images = []
        cleaned = content
        
        # Match markdown images: ![alt](url)
        md_pattern = r'!\[([^\]]*)\]\((https?://[^\s\)]+)\)'
        for match in re.finditer(md_pattern, content):
            alt_text = match.group(1)
            url = match.group(2)
            # Only extract URLs that look like actual images
            if any(url.lower().endswith(ext) or ext in url.lower() for ext in
                   ['.png', '.jpg', '.jpeg', '.gif', '.webp', 'fal.media', 'fal-cdn', 'replicate.delivery']):
                images.append((url, alt_text))
        
        # Match HTML img tags: <img src="url"> or <img src="url"></img> or <img src="url"/>
        html_pattern = r'<img\s+src=["\']?(https?://[^\s"\'<>]+)["\']?\s*/?>\s*(?:</img>)?'
        for match in re.finditer(html_pattern, content):
            url = match.group(1)
            images.append((url, ""))
        
        # Remove only the matched image tags from content (not all markdown images)
        if images:
            extracted_urls = {url for url, _ in images}
            def _remove_if_extracted(match):
                url = match.group(2) if match.lastindex >= 2 else match.group(1)
                return '' if url in extracted_urls else match.group(0)
            cleaned = re.sub(md_pattern, _remove_if_extracted, cleaned)
            cleaned = re.sub(html_pattern, _remove_if_extracted, cleaned)
            # Clean up leftover blank lines
            cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
        
        return images, cleaned
    
    async def send_voice(
        self,
        chat_id: str,
        audio_path: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
        **kwargs,
    ) -> SendResult:
        """
        Send an audio file as a native voice message via the platform API.
        
        Override in subclasses to send audio as voice bubbles (Telegram)
        or file attachments (Discord). Default falls back to sending the
        file path as text.
        """
        text = f"🔊 Audio: {audio_path}"
        if caption:
            text = f"{caption}\n{text}"
        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)

    async def play_tts(
        self,
        chat_id: str,
        audio_path: str,
        **kwargs,
    ) -> SendResult:
        """
        Play auto-TTS audio for voice replies.

        Override in subclasses for invisible playback (e.g. Web UI).
        Default falls back to send_voice (shows audio player).
        """
        return await self.send_voice(chat_id=chat_id, audio_path=audio_path, **kwargs)

    async def send_video(
        self,
        chat_id: str,
        video_path: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
        **kwargs,
    ) -> SendResult:
        """
        Send a video natively via the platform API.

        Override in subclasses to send videos as inline playable media.
        Default falls back to sending the file path as text.
        """
        text = f"🎬 Video: {video_path}"
        if caption:
            text = f"{caption}\n{text}"
        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)

    async def send_document(
        self,
        chat_id: str,
        file_path: str,
        caption: Optional[str] = None,
        file_name: Optional[str] = None,
        reply_to: Optional[str] = None,
        **kwargs,
    ) -> SendResult:
        """
        Send a document/file natively via the platform API.

        Override in subclasses to send files as downloadable attachments.
        Default falls back to sending the file path as text.
        """
        text = f"📎 File: {file_path}"
        if caption:
            text = f"{caption}\n{text}"
        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)

    async def send_image_file(
        self,
        chat_id: str,
        image_path: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
        **kwargs,
    ) -> SendResult:
        """
        Send a local image file natively via the platform API.

        Unlike send_image() which takes a URL, this takes a local file path.
        Override in subclasses for native photo attachments.
        Default falls back to sending the file path as text.
        """
        text = f"🖼️ Image: {image_path}"
        if caption:
            text = f"{caption}\n{text}"
        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)

    @staticmethod
    def extract_media(content: str) -> Tuple[List[Tuple[str, bool]], str]:
        """
        Extract MEDIA:<path> tags and [[audio_as_voice]] directives from response text.
        
        The TTS tool returns responses like:
            [[audio_as_voice]]
            MEDIA:/path/to/audio.ogg
        
        Args:
            content: The response text to scan.
        
        Returns:
            Tuple of (list of (path, is_voice) pairs, cleaned content with tags removed).
        """
        media = []
        cleaned = content
        
        # Check for [[audio_as_voice]] directive
        has_voice_tag = "[[audio_as_voice]]" in content
        cleaned = cleaned.replace("[[audio_as_voice]]", "")
        
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
            if len(path) >= 2 and path[0] == path[-1] and path[0] in "`\"'":
                path = path[1:-1].strip()
            path = path.lstrip("`\"'").rstrip("`\"',.;:)}]")
            if path:
                media.append((os.path.expanduser(path), has_voice_tag))

        # Remove MEDIA tags from content (including surrounding quote/backtick wrappers)
        if media:
            cleaned = media_pattern.sub('', cleaned)
            cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
        
        return media, cleaned

    @staticmethod
    def extract_local_files(content: str) -> Tuple[List[str], str]:
        """
        Detect bare local file paths in response text for native media delivery.

        Matches absolute paths (/...) and tilde paths (~/) ending in common
        image or video extensions.  Validates each candidate with
        ``os.path.isfile()`` to avoid false positives from URLs or
        non-existent paths.

        Paths inside fenced code blocks (``` ... ```) and inline code
        (`...`) are ignored so that code samples are never mutilated.

        Returns:
            Tuple of (list of expanded file paths, cleaned text with the
            raw path strings removed).
        """
        _LOCAL_MEDIA_EXTS = (
            '.png', '.jpg', '.jpeg', '.gif', '.webp',
            '.mp4', '.mov', '.avi', '.mkv', '.webm',
        )
        ext_part = '|'.join(e.lstrip('.') for e in _LOCAL_MEDIA_EXTS)

        # (?<![/:\w.]) prevents matching inside URLs (e.g. https://…/img.png)
        #             and relative paths (./foo.png)
        # (?:~/|/)    anchors to absolute or home-relative paths
        path_re = re.compile(
            r'(?<![/:\w.])(?:~/|/)(?:[\w.\-]+/)*[\w.\-]+\.(?:' + ext_part + r')\b',
            re.IGNORECASE,
        )

        # Build spans covered by fenced code blocks and inline code
        code_spans: list = []
        for m in re.finditer(r'```[^\n]*\n.*?```', content, re.DOTALL):
            code_spans.append((m.start(), m.end()))
        for m in re.finditer(r'`[^`\n]+`', content):
            code_spans.append((m.start(), m.end()))

        def _in_code(pos: int) -> bool:
            return any(s <= pos < e for s, e in code_spans)

        found: list = []  # (raw_match_text, expanded_path)
        for match in path_re.finditer(content):
            if _in_code(match.start()):
                continue
            raw = match.group(0)
            expanded = os.path.expanduser(raw)
            if os.path.isfile(expanded):
                found.append((raw, expanded))

        # Deduplicate by expanded path, preserving discovery order
        seen: set = set()
        unique: list = []
        for raw, expanded in found:
            if expanded not in seen:
                seen.add(expanded)
                unique.append((raw, expanded))

        paths = [expanded for _, expanded in unique]

        cleaned = content
        if unique:
            for raw, _exp in unique:
                cleaned = cleaned.replace(raw, '')
            cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()

        return paths, cleaned

    async def _keep_typing(
        self,
        chat_id: str,
        interval: float = 2.0,
        metadata=None,
        stop_event: asyncio.Event | None = None,
    ) -> None:
        """
        Continuously send typing indicator until cancelled.
        
        Telegram/Discord typing status expires after ~5 seconds, so we refresh every 2
        to recover quickly after progress messages interrupt it.
        
        Skips send_typing when the chat is in ``_typing_paused`` (e.g. while
        the agent is waiting for dangerous-command approval).  This is critical
        for Slack's Assistant API where ``assistant_threads_setStatus`` disables
        the compose box — pausing lets the user type ``/approve`` or ``/deny``.
        """
        try:
            while True:
                if stop_event is not None and stop_event.is_set():
                    return
                if chat_id not in self._typing_paused:
                    await self.send_typing(chat_id, metadata=metadata)
                if stop_event is None:
                    await asyncio.sleep(interval)
                    continue
                try:
                    await asyncio.wait_for(stop_event.wait(), timeout=interval)
                except asyncio.TimeoutError:
                    continue
                return
        except asyncio.CancelledError:
            pass  # Normal cancellation when handler completes
        finally:
            # Ensure the underlying platform typing loop is stopped.
            # _keep_typing may have called send_typing() after an outer
            # stop_typing() cleared the task dict, recreating the loop.
            # Cancelling _keep_typing alone won't clean that up.
            if hasattr(self, "stop_typing"):
                try:
                    await self.stop_typing(chat_id)
                except Exception:
                    pass
            self._typing_paused.discard(chat_id)

    def pause_typing_for_chat(self, chat_id: str) -> None:
        """Pause typing indicator for a chat (e.g. during approval waits).

        Thread-safe (CPython GIL) — can be called from the sync agent thread
        while ``_keep_typing`` runs on the async event loop.
        """
        self._typing_paused.add(chat_id)

    def resume_typing_for_chat(self, chat_id: str) -> None:
        """Resume typing indicator for a chat after approval resolves."""
        self._typing_paused.discard(chat_id)

    async def interrupt_session_activity(self, session_key: str, chat_id: str) -> None:
        """Signal the active session loop to stop and clear typing immediately."""
        if session_key:
            interrupt_event = self._active_sessions.get(session_key)
            if interrupt_event is not None:
                interrupt_event.set()
        try:
            await self.stop_typing(chat_id)
        except Exception:
            pass

    def register_post_delivery_callback(
        self,
        session_key: str,
        callback: Callable,
        *,
        generation: int | None = None,
    ) -> None:
        """Register a deferred callback to fire after the main response.

        ``generation`` lets callers tie the callback to a specific gateway run
        generation so stale runs cannot clear callbacks owned by a fresher run.
        """
        if not session_key or not callable(callback):
            return
        if generation is None:
            self._post_delivery_callbacks[session_key] = callback
        else:
            self._post_delivery_callbacks[session_key] = (int(generation), callback)

    def pop_post_delivery_callback(
        self,
        session_key: str,
        *,
        generation: int | None = None,
    ) -> Callable | None:
        """Pop a deferred callback, optionally requiring generation ownership."""
        if not session_key:
            return None
        entry = self._post_delivery_callbacks.get(session_key)
        if entry is None:
            return None
        if isinstance(entry, tuple) and len(entry) == 2:
            entry_generation, callback = entry
            if generation is not None and int(entry_generation) != int(generation):
                return None
            self._post_delivery_callbacks.pop(session_key, None)
            return callback if callable(callback) else None
        if generation is not None:
            return None
        self._post_delivery_callbacks.pop(session_key, None)
        return entry if callable(entry) else None

    # ── Processing lifecycle hooks ──────────────────────────────────────────
    # Subclasses override these to react to message processing events
    # (e.g. Discord adds 👀/✅/❌ reactions).

    async def on_processing_start(self, event: MessageEvent) -> None:
        """Hook called when background processing begins."""

    async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
        """Hook called when background processing completes."""

    async def _run_processing_hook(self, hook_name: str, *args: Any, **kwargs: Any) -> None:
        """Run a lifecycle hook without letting failures break message flow."""
        hook = getattr(self, hook_name, None)
        if not callable(hook):
            return
        try:
            await hook(*args, **kwargs)
        except Exception as e:
            logger.warning("[%s] %s hook failed: %s", self.name, hook_name, e)

    @staticmethod
    def _is_retryable_error(error: Optional[str]) -> bool:
        """Return True if the error string looks like a transient network failure."""
        if not error:
            return False
        lowered = error.lower()
        return any(pat in lowered for pat in _RETRYABLE_ERROR_PATTERNS)

    @staticmethod
    def _is_timeout_error(error: Optional[str]) -> bool:
        """Return True if the error string indicates a read/write timeout.

        Timeout errors are NOT retryable and should NOT trigger plain-text
        fallback — the request may have already been delivered.
        """
        if not error:
            return False
        lowered = error.lower()
        return "timed out" in lowered or "readtimeout" in lowered or "writetimeout" in lowered

    async def _send_with_retry(
        self,
        chat_id: str,
        content: str,
        reply_to: Optional[str] = None,
        metadata: Any = None,
        max_retries: int = 2,
        base_delay: float = 2.0,
    ) -> "SendResult":
        """
        Send a message with automatic retry for transient network errors.

        On permanent failures (e.g. formatting / permission errors) falls back
        to a plain-text version before giving up. If all attempts fail due to
        network errors, sends the user a brief delivery-failure notice so they
        know to retry rather than waiting indefinitely.
        """

        result = await self.send(
            chat_id=chat_id,
            content=content,
            reply_to=reply_to,
            metadata=metadata,
        )

        if result.success:
            return result

        error_str = result.error or ""
        is_network = result.retryable or self._is_retryable_error(error_str)

        # Timeout errors are not safe to retry (message may have been
        # delivered) and not formatting errors — return the failure as-is.
        if not is_network and self._is_timeout_error(error_str):
            return result

        if is_network:
            # Retry with exponential backoff for transient errors
            for attempt in range(1, max_retries + 1):
                delay = base_delay * (2 ** (attempt - 1)) + random.uniform(0, 1)
                logger.warning(
                    "[%s] Send failed (attempt %d/%d, retrying in %.1fs): %s",
                    self.name, attempt, max_retries, delay, error_str,
                )
                await asyncio.sleep(delay)
                result = await self.send(
                    chat_id=chat_id,
                    content=content,
                    reply_to=reply_to,
                    metadata=metadata,
                )
                if result.success:
                    logger.info("[%s] Send succeeded on retry %d", self.name, attempt)
                    return result
                error_str = result.error or ""
                if not (result.retryable or self._is_retryable_error(error_str)):
                    break  # error switched to non-transient — fall through to plain-text fallback
            else:
                # All retries exhausted (loop completed without break) — notify user
                logger.error("[%s] Failed to deliver response after %d retries: %s", self.name, max_retries, error_str)
                notice = (
                    "\u26a0\ufe0f Message delivery failed after multiple attempts. "
                    "Please try again \u2014 your request was processed but the response could not be sent."
                )
                try:
                    await self.send(chat_id=chat_id, content=notice, reply_to=reply_to, metadata=metadata)
                except Exception as notify_err:
                    logger.debug("[%s] Could not send delivery-failure notice: %s", self.name, notify_err)
                return result

        # Non-network / post-retry formatting failure: try plain text as fallback
        logger.warning("[%s] Send failed: %s — trying plain-text fallback", self.name, error_str)
        fallback_result = await self.send(
            chat_id=chat_id,
            content=f"(Response formatting failed, plain text:)\n\n{content[:3500]}",
            reply_to=reply_to,
            metadata=metadata,
        )
        if not fallback_result.success:
            logger.error("[%s] Fallback send also failed: %s", self.name, fallback_result.error)
        return fallback_result

    @staticmethod
    def _merge_caption(existing_text: Optional[str], new_text: str) -> str:
        """Merge a new caption into existing text, avoiding duplicates.

        Uses line-by-line exact match (not substring) to prevent false positives
        where a shorter caption is silently dropped because it appears as a
        substring of a longer one (e.g. "Meeting" inside "Meeting agenda").
        Whitespace is normalised for comparison.
        """
        if not existing_text:
            return new_text
        existing_captions = [c.strip() for c in existing_text.split("\n\n")]
        if new_text.strip() not in existing_captions:
            return f"{existing_text}\n\n{new_text}".strip()
        return existing_text

    # ------------------------------------------------------------------
    # Session task + guard ownership helpers
    # ------------------------------------------------------------------
    # These were introduced together with the _session_tasks owner map to
    # make session lifecycle reconciliation deterministic across (a) the
    # normal completion path, (b) /stop/ /new/ /reset bypass commands,
    # and (c) stale-lock self-heal on the next inbound message.

    def _release_session_guard(
        self,
        session_key: str,
        *,
        guard: Optional[asyncio.Event] = None,
    ) -> None:
        """Release the adapter-level guard for a session.

        When ``guard`` is provided, only release the entry if it still points
        at that exact Event.  This lets reset-like commands swap in a temporary
        guard while the old processing task unwinds, without having the old
        task's cleanup accidentally clear the replacement guard.
        """
        current_guard = self._active_sessions.get(session_key)
        if current_guard is None:
            return
        if guard is not None and current_guard is not guard:
            return
        del self._active_sessions[session_key]

    def _session_task_is_stale(self, session_key: str) -> bool:
        """Return True if the owner task for ``session_key`` is done/cancelled.

        A lock is "stale" when the adapter still has ``_active_sessions[key]``
        AND a known owner task in ``_session_tasks`` that has already exited.
        When there is no owner task at all, that usually means the guard was
        installed by some path other than handle_message() (tests sometimes
        install guards directly) — don't treat that as stale.  The on-entry
        self-heal only needs to handle the production split-brain case where
        an owner task was recorded, then exited without clearing its guard.
        """
        task = self._session_tasks.get(session_key)
        if task is None:
            return False
        done = getattr(task, "done", None)
        return bool(done and done())

    def _heal_stale_session_lock(self, session_key: str) -> bool:
        """Clear a stale session lock if the owner task is already gone.

        Returns True if a stale lock was healed.  Returns False if there is
        no lock, or the owner task is still alive (the normal busy case).

        This is the on-entry safety net sidbin's issue #11016 analysis calls
        for: without it, a split-brain — adapter still thinks the session is
        active, but nothing is actually processing — traps the chat in
        infinite "Interrupting current task..." until the gateway is
        restarted.
        """
        if session_key not in self._active_sessions:
            return False
        if not self._session_task_is_stale(session_key):
            return False
        logger.warning(
            "[%s] Healing stale session lock for %s (owner task is done/absent)",
            self.name,
            session_key,
        )
        self._active_sessions.pop(session_key, None)
        self._pending_messages.pop(session_key, None)
        self._session_tasks.pop(session_key, None)
        return True

    def _start_session_processing(
        self,
        event: MessageEvent,
        session_key: str,
        *,
        interrupt_event: Optional[asyncio.Event] = None,
    ) -> bool:
        """Spawn a background processing task under the given session guard.

        Returns True on success.  If the runtime stubs ``create_task`` with a
        non-Task sentinel (some tests do this), the guard is rolled back and
        False is returned so the caller isn't left holding a half-installed
        session lock.
        """
        guard = interrupt_event or asyncio.Event()
        self._active_sessions[session_key] = guard

        task = asyncio.create_task(self._process_message_background(event, session_key))
        self._session_tasks[session_key] = task
        try:
            self._background_tasks.add(task)
        except TypeError:
            # Tests stub create_task() with lightweight sentinels that are not
            # hashable and do not support lifecycle callbacks.
            self._session_tasks.pop(session_key, None)
            self._release_session_guard(session_key, guard=guard)
            return False
        if hasattr(task, "add_done_callback"):
            task.add_done_callback(self._background_tasks.discard)
            task.add_done_callback(self._expected_cancelled_tasks.discard)
        return True

    async def cancel_session_processing(
        self,
        session_key: str,
        *,
        release_guard: bool = True,
        discard_pending: bool = True,
    ) -> None:
        """Cancel in-flight processing for a single session.

        ``release_guard=False`` keeps the adapter-level session guard in place
        so reset-like commands can finish atomically before follow-up messages
        are allowed to start a fresh background task.
        """
        task = self._session_tasks.pop(session_key, None)
        if task is not None and not task.done():
            logger.debug(
                "[%s] Cancelling active processing for session %s",
                self.name,
                session_key,
            )
            self._expected_cancelled_tasks.add(task)
            task.cancel()
            try:
                await task
            except asyncio.CancelledError:
                pass
            except Exception:
                logger.debug(
                    "[%s] Session cancellation raised while unwinding %s",
                    self.name,
                    session_key,
                    exc_info=True,
                )
        if discard_pending:
            self._pending_messages.pop(session_key, None)
        if release_guard:
            self._release_session_guard(session_key)

    async def _drain_pending_after_session_command(
        self,
        session_key: str,
        command_guard: asyncio.Event,
    ) -> None:
        """Resume the latest queued follow-up once a session command completes.

        Called at the tail of /stop, /new, and /reset dispatch.  Releases the
        command-scoped guard, then — if a follow-up message landed while the
        command was running — spawns a fresh processing task for it.
        """
        pending_event = self._pending_messages.pop(session_key, None)
        self._release_session_guard(session_key, guard=command_guard)
        if pending_event is None:
            return
        self._start_session_processing(pending_event, session_key)

    async def _dispatch_active_session_command(
        self,
        event: MessageEvent,
        session_key: str,
        cmd: str,
    ) -> None:
        """Dispatch a reset-like bypass command while preserving guard ordering.

        /stop, /new, and /reset must:
          1. Keep the session guard installed while the runner processes the
             command (so a racing follow-up message stays queued, not
             dispatched as a second parallel run).
          2. Cancel the old in-flight adapter task only AFTER the runner has
             finished handling the command (so the runner sees consistent
             state and its response is sent in order).
          3. Release the command-scoped guard and drain the latest queued
             follow-up exactly once, after 1 and 2 complete.
        """
        logger.debug(
            "[%s] Command '/%s' bypassing active-session guard for %s",
            self.name,
            cmd,
            session_key,
        )

        current_guard = self._active_sessions.get(session_key)
        command_guard = asyncio.Event()
        self._active_sessions[session_key] = command_guard
        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None

        try:
            response = await self._message_handler(event)
            # Old adapter task (if any) is cancelled AFTER the runner has
            # fully handled the command — keeps ordering deterministic.
            await self.cancel_session_processing(
                session_key,
                release_guard=False,
                discard_pending=False,
            )
            if response:
                await self._send_with_retry(
                    chat_id=event.source.chat_id,
                    content=response,
                    reply_to=event.message_id,
                    metadata=thread_meta,
                )
        except Exception:
            # On failure, restore the original guard if one still exists so
            # we don't leave the session in a half-reset state.
            if self._active_sessions.get(session_key) is command_guard:
                if session_key in self._session_tasks and current_guard is not None:
                    self._active_sessions[session_key] = current_guard
                else:
                    self._release_session_guard(session_key, guard=command_guard)
            raise

        await self._drain_pending_after_session_command(session_key, command_guard)

    async def handle_message(self, event: MessageEvent) -> None:
        """
        Process an incoming message.
        
        This method returns quickly by spawning background tasks.
        This allows new messages to be processed even while an agent is running,
        enabling interruption support.
        """
        if not self._message_handler:
            return
        
        session_key = build_session_key(
            event.source,
            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
        )

        # On-entry self-heal: if the adapter still has an _active_sessions
        # entry for this key but the owner task has already exited (done or
        # cancelled), the lock is stale.  Clear it and fall through to
        # normal dispatch so the user isn't trapped behind a dead guard —
        # this is the split-brain tail described in issue #11016.
        if session_key in self._active_sessions:
            self._heal_stale_session_lock(session_key)

        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
            # Certain commands must bypass the active-session guard and be
            # dispatched directly to the gateway runner.  Without this, they
            # are queued as pending messages and either:
            #   - leak into the conversation as user text (/stop, /new), or
            #   - deadlock (/approve, /deny — agent is blocked on Event.wait)
            #
            # Dispatch inline: call the message handler directly and send the
            # response.  Do NOT use _process_message_background — it manages
            # session lifecycle and its cleanup races with the running task
            # (see PR #4926).
            cmd = event.get_command()
            from hermes_cli.commands import should_bypass_active_session

            if should_bypass_active_session(cmd):
                # /stop, /new, /reset must cancel the in-flight adapter task
                # and preserve ordering of queued follow-ups.  Route those
                # through the dedicated handoff path that serializes
                # cancellation + runner response + pending drain.
                if cmd in ("stop", "new", "reset"):
                    try:
                        await self._dispatch_active_session_command(event, session_key, cmd)
                    except Exception as e:
                        logger.error(
                            "[%s] Command '/%s' dispatch failed: %s",
                            self.name, cmd, e, exc_info=True,
                        )
                    return

                # Other bypass commands (/approve, /deny, /status,
                # /background, /restart) just need direct dispatch — they
                # don't cancel the running task.
                logger.debug(
                    "[%s] Command '/%s' bypassing active-session guard for %s",
                    self.name, cmd, session_key,
                )
                try:
                    _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
                    response = await self._message_handler(event)
                    if response:
                        await self._send_with_retry(
                            chat_id=event.source.chat_id,
                            content=response,
                            reply_to=event.message_id,
                            metadata=_thread_meta,
                        )
                except Exception as e:
                    logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
                return

            if self._busy_session_handler is not None:
                try:
                    if await self._busy_session_handler(event, session_key):
                        return
                except Exception as e:
                    logger.error("[%s] Busy-session handler failed: %s", self.name, e, exc_info=True)

            # Special case: photo bursts/albums frequently arrive as multiple near-
            # simultaneous messages. Queue them without interrupting the active run,
            # then process them immediately after the current task finishes.
            if event.message_type == MessageType.PHOTO:
                logger.debug("[%s] Queuing photo follow-up for session %s without interrupt", self.name, session_key)
                merge_pending_message_event(self._pending_messages, session_key, event)
                return  # Don't interrupt now - will run after current task completes

            # Default behavior for non-photo follow-ups: interrupt the running agent
            logger.debug("[%s] New message while session %s is active — triggering interrupt", self.name, session_key)
            self._pending_messages[session_key] = event
            # Signal the interrupt (the processing task checks this)
            self._active_sessions[session_key].set()
            return  # Don't process now - will be handled after current task finishes
        
        # Mark session as active BEFORE spawning background task to close
        # the race window where a second message arriving before the task
        # starts would also pass the _active_sessions check and spawn a
        # duplicate task.  (grammY sequentialize / aiogram EventIsolation
        # pattern — set the guard synchronously, not inside the task.)
        # _start_session_processing installs the guard AND the owner-task
        # mapping atomically so stale-lock detection works.
        self._start_session_processing(event, session_key)
    
    @staticmethod
    def _get_human_delay() -> float:
        """
        Return a random delay in seconds for human-like response pacing.

        Reads from env vars:
          HERMES_HUMAN_DELAY_MODE: "off" (default) | "natural" | "custom"
          HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
          HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
        """
        mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
        if mode == "off":
            return 0.0
        min_ms = int(os.getenv("HERMES_HUMAN_DELAY_MIN_MS", "800"))
        max_ms = int(os.getenv("HERMES_HUMAN_DELAY_MAX_MS", "2500"))
        if mode == "natural":
            min_ms, max_ms = 800, 2500
        return random.uniform(min_ms / 1000.0, max_ms / 1000.0)

    async def _process_message_background(self, event: MessageEvent, session_key: str) -> None:
        """Background task that actually processes the message."""
        # Track delivery outcomes for the processing-complete hook
        delivery_attempted = False
        delivery_succeeded = False

        def _record_delivery(result):
            nonlocal delivery_attempted, delivery_succeeded
            if result is None:
                return
            delivery_attempted = True
            if getattr(result, "success", False):
                delivery_succeeded = True

        # Reuse the interrupt event set by handle_message() (which marks
        # the session active before spawning this task to prevent races).
        # Fall back to a new Event only if the entry was removed externally.
        interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
        self._active_sessions[session_key] = interrupt_event
        callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
        
        # Start continuous typing indicator (refreshes every 2 seconds)
        _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
        _keep_typing_kwargs = {"metadata": _thread_metadata}
        try:
            _keep_typing_sig = inspect.signature(self._keep_typing)
        except (TypeError, ValueError):
            _keep_typing_sig = None
        if _keep_typing_sig is None or "stop_event" in _keep_typing_sig.parameters:
            _keep_typing_kwargs["stop_event"] = interrupt_event
        typing_task = asyncio.create_task(
            self._keep_typing(
                event.source.chat_id,
                **_keep_typing_kwargs,
            )
        )
        
        try:
            await self._run_processing_hook("on_processing_start", event)

            # Call the handler (this can take a while with tool calls)
            response = await self._message_handler(event)
            
            # Send response if any.  A None/empty response is normal when
            # streaming already delivered the text (already_sent=True) or
            # when the message was queued behind an active agent.  Log at
            # DEBUG to avoid noisy warnings for expected behavior.
            #
            # Suppress stale response when the session was interrupted by a
            # new message that hasn't been consumed yet.  The pending message
            # is processed by the pending-message handler below (#8221/#2483).
            if (
                response
                and interrupt_event.is_set()
                and session_key in self._pending_messages
            ):
                logger.info(
                    "[%s] Suppressing stale response for interrupted session %s",
                    self.name,
                    session_key,
                )
                response = None
            if not response:
                logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
            if response:
                # Extract MEDIA:<path> tags (from TTS tool) before other processing
                media_files, response = self.extract_media(response)
                
                # Extract image URLs and send them as native platform attachments
                images, text_content = self.extract_images(response)
                # Strip any remaining internal directives from message body (fixes #1561)
                text_content = text_content.replace("[[audio_as_voice]]", "").strip()
                text_content = re.sub(r"MEDIA:\s*\S+", "", text_content).strip()
                if images:
                    logger.info("[%s] extract_images found %d image(s) in response (%d chars)", self.name, len(images), len(response))

                # Auto-detect bare local file paths for native media delivery
                # (helps small models that don't use MEDIA: syntax)
                local_files, text_content = self.extract_local_files(text_content)
                if local_files:
                    logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
                
                # Auto-TTS: if voice message, generate audio FIRST (before sending text)
                # Skipped when the chat has voice mode disabled (/voice off)
                _tts_path = None
                if (event.message_type == MessageType.VOICE
                        and text_content
                        and not media_files
                        and event.source.chat_id not in self._auto_tts_disabled_chats):
                    try:
                        from tools.tts_tool import text_to_speech_tool, check_tts_requirements
                        if check_tts_requirements():
                            import json as _json
                            speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
                            if not speech_text:
                                raise ValueError("Empty text after markdown cleanup")
                            tts_result_str = await asyncio.to_thread(
                                text_to_speech_tool, text=speech_text
                            )
                            tts_data = _json.loads(tts_result_str)
                            _tts_path = tts_data.get("file_path")
                    except Exception as tts_err:
                        logger.warning("[%s] Auto-TTS failed: %s", self.name, tts_err)

                # Play TTS audio before text (voice-first experience)
                if _tts_path and Path(_tts_path).exists():
                    try:
                        await self.play_tts(
                            chat_id=event.source.chat_id,
                            audio_path=_tts_path,
                            metadata=_thread_metadata,
                        )
                    finally:
                        try:
                            os.remove(_tts_path)
                        except OSError:
                            pass

                # Send the text portion
                if text_content:
                    logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id)
                    result = await self._send_with_retry(
                        chat_id=event.source.chat_id,
                        content=text_content,
                        reply_to=event.message_id,
                        metadata=_thread_metadata,
                    )
                    _record_delivery(result)

                # Human-like pacing delay between text and media
                human_delay = self._get_human_delay()

                # Send extracted images as native attachments
                if images:
                    logger.info("[%s] Extracted %d image(s) to send as attachments", self.name, len(images))
                for image_url, alt_text in images:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
                        logger.info(
                            "[%s] Sending image: %s (alt=%s)",
                            self.name,
                            safe_url_for_log(image_url),
                            alt_text[:30] if alt_text else "",
                        )
                        # Route animated GIFs through send_animation for proper playback
                        if self._is_animation_url(image_url):
                            img_result = await self.send_animation(
                                chat_id=event.source.chat_id,
                                animation_url=image_url,
                                caption=alt_text if alt_text else None,
                                metadata=_thread_metadata,
                            )
                        else:
                            img_result = await self.send_image(
                                chat_id=event.source.chat_id,
                                image_url=image_url,
                                caption=alt_text if alt_text else None,
                                metadata=_thread_metadata,
                            )
                        if not img_result.success:
                            logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
                    except Exception as img_err:
                        logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)

                # Send extracted media files — route by file type
                _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
                _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
                _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}

                for media_path, is_voice in media_files:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
                        ext = Path(media_path).suffix.lower()
                        if ext in _AUDIO_EXTS:
                            media_result = await self.send_voice(
                                chat_id=event.source.chat_id,
                                audio_path=media_path,
                                metadata=_thread_metadata,
                            )
                        elif ext in _VIDEO_EXTS:
                            media_result = await self.send_video(
                                chat_id=event.source.chat_id,
                                video_path=media_path,
                                metadata=_thread_metadata,
                            )
                        elif ext in _IMAGE_EXTS:
                            media_result = await self.send_image_file(
                                chat_id=event.source.chat_id,
                                image_path=media_path,
                                metadata=_thread_metadata,
                            )
                        else:
                            media_result = await self.send_document(
                                chat_id=event.source.chat_id,
                                file_path=media_path,
                                metadata=_thread_metadata,
                            )

                        if not media_result.success:
                            logger.warning("[%s] Failed to send media (%s): %s", self.name, ext, media_result.error)
                    except Exception as media_err:
                        logger.warning("[%s] Error sending media: %s", self.name, media_err)

                # Send auto-detected local files as native attachments
                for file_path in local_files:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
                        ext = Path(file_path).suffix.lower()
                        if ext in _IMAGE_EXTS:
                            await self.send_image_file(
                                chat_id=event.source.chat_id,
                                image_path=file_path,
                                metadata=_thread_metadata,
                            )
                        elif ext in _VIDEO_EXTS:
                            await self.send_video(
                                chat_id=event.source.chat_id,
                                video_path=file_path,
                                metadata=_thread_metadata,
                            )
                        else:
                            await self.send_document(
                                chat_id=event.source.chat_id,
                                file_path=file_path,
                                metadata=_thread_metadata,
                            )
                    except Exception as file_err:
                        logger.error("[%s] Error sending local file %s: %s", self.name, file_path, file_err)

            # Determine overall success for the processing hook
            processing_ok = delivery_succeeded if delivery_attempted else not bool(response)
            await self._run_processing_hook(
                "on_processing_complete",
                event,
                ProcessingOutcome.SUCCESS if processing_ok else ProcessingOutcome.FAILURE,
            )

            # Check if there's a pending message that was queued during our processing
            if session_key in self._pending_messages:
                pending_event = self._pending_messages.pop(session_key)
                logger.debug("[%s] Processing queued message from interrupt", self.name)
                # Keep the _active_sessions entry live across the turn chain
                # and only CLEAR the interrupt Event — do NOT delete the entry.
                # If we deleted here, a concurrent inbound message arriving
                # during the awaits below would pass the Level-1 guard, spawn
                # its own _process_message_background, and run simultaneously
                # with the recursive drain below.  Two agents on one
                # session_key = duplicate responses, duplicate tool calls.
                # Clearing the Event keeps the guard live so follow-ups take
                # the busy-handler path (queue + interrupt) as intended.
                _active = self._active_sessions.get(session_key)
                if _active is not None:
                    _active.clear()
                typing_task.cancel()
                try:
                    await typing_task
                except asyncio.CancelledError:
                    pass
                # Process pending message in new background task
                await self._process_message_background(pending_event, session_key)
                return  # Already cleaned up
                
        except asyncio.CancelledError:
            current_task = asyncio.current_task()
            outcome = ProcessingOutcome.CANCELLED
            if current_task is None or current_task not in self._expected_cancelled_tasks:
                outcome = ProcessingOutcome.FAILURE
            await self._run_processing_hook("on_processing_complete", event, outcome)
            raise
        except Exception as e:
            await self._run_processing_hook("on_processing_complete", event, ProcessingOutcome.FAILURE)
            logger.error("[%s] Error handling message: %s", self.name, e, exc_info=True)
            # Send the error to the user so they aren't left with radio silence
            try:
                error_type = type(e).__name__
                error_detail = str(e)[:300] if str(e) else "no details available"
                _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
                await self.send(
                    chat_id=event.source.chat_id,
                    content=(
                        f"Sorry, I encountered an error ({error_type}).\n"
                        f"{error_detail}\n"
                        "Try again or use /reset to start a fresh session."
                    ),
                    metadata=_thread_metadata,
                )
            except Exception:
                pass  # Last resort — don't let error reporting crash the handler
        finally:
            # Fire any one-shot post-delivery callback registered for this
            # session (e.g. deferred background-review notifications).
            _callback_generation = callback_generation
            if hasattr(self, "pop_post_delivery_callback"):
                _post_cb = self.pop_post_delivery_callback(
                    session_key,
                    generation=_callback_generation,
                )
            else:
                _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
            if callable(_post_cb):
                try:
                    _post_cb()
                except Exception:
                    pass
            # Stop typing indicator
            typing_task.cancel()
            try:
                await typing_task
            except asyncio.CancelledError:
                pass
            # Also cancel any platform-level persistent typing tasks (e.g. Discord)
            # that may have been recreated by _keep_typing after the last stop_typing()
            try:
                if hasattr(self, "stop_typing"):
                    await self.stop_typing(event.source.chat_id)
            except Exception:
                pass
            # Late-arrival drain: a message may have arrived during the
            # cleanup awaits above (typing_task cancel, stop_typing).  Such
            # messages passed the Level-1 guard (entry still live, Event
            # possibly set) and landed in _pending_messages via the
            # busy-handler path.  Without this block, we would delete the
            # active-session entry and the queued message would be silently
            # dropped (user never gets a reply).
            late_pending = self._pending_messages.pop(session_key, None)
            if late_pending is not None:
                logger.debug(
                    "[%s] Late-arrival pending message during cleanup — spawning drain task",
                    self.name,
                )
                _active = self._active_sessions.get(session_key)
                if _active is not None:
                    _active.clear()
                drain_task = asyncio.create_task(
                    self._process_message_background(late_pending, session_key)
                )
                # Hand ownership of the session to the drain task so stale-lock
                # detection keeps working while it runs.
                self._session_tasks[session_key] = drain_task
                try:
                    self._background_tasks.add(drain_task)
                    drain_task.add_done_callback(self._background_tasks.discard)
                except TypeError:
                    # Tests stub create_task() with non-hashable sentinels; tolerate.
                    pass
                # Leave _active_sessions[session_key] populated — the drain
                # task's own lifecycle will clean it up.
            else:
                # Clean up session tracking.  Guard-match both deletes so a
                # reset-like command that already swapped in its own
                # command_guard (and cancelled us) can't be accidentally
                # cleared by our unwind.  The command owns the session now.
                current_task = asyncio.current_task()
                if current_task is not None and self._session_tasks.get(session_key) is current_task:
                    del self._session_tasks[session_key]
                self._release_session_guard(session_key, guard=interrupt_event)
    
    async def cancel_background_tasks(self) -> None:
        """Cancel any in-flight background message-processing tasks.

        Used during gateway shutdown/replacement so active sessions from the old
        process do not keep running after adapters are being torn down.
        """
        # Loop until no new tasks appear.  Without this, a message
        # arriving during the `await asyncio.gather` below would spawn
        # a fresh _process_message_background task (added to
        # self._background_tasks at line ~1668 via handle_message),
        # and the _background_tasks.clear() at the end of this method
        # would drop the reference — the task runs untracked against a
        # disconnecting adapter, logs send-failures, and may linger
        # until it completes on its own.  Retrying the drain until the
        # task set stabilizes closes the window.
        MAX_DRAIN_ROUNDS = 5
        for _ in range(MAX_DRAIN_ROUNDS):
            tasks = [task for task in self._background_tasks if not task.done()]
            if not tasks:
                break
            for task in tasks:
                self._expected_cancelled_tasks.add(task)
                task.cancel()
            await asyncio.gather(*tasks, return_exceptions=True)
            # Loop: late-arrival tasks spawned during the gather above
            # will be in self._background_tasks now.  Re-check.
        self._background_tasks.clear()
        self._expected_cancelled_tasks.clear()
        self._session_tasks.clear()
        self._pending_messages.clear()
        self._active_sessions.clear()

    def has_pending_interrupt(self, session_key: str) -> bool:
        """Check if there's a pending interrupt for a session."""
        return session_key in self._active_sessions and self._active_sessions[session_key].is_set()
    
    def get_pending_message(self, session_key: str) -> Optional[MessageEvent]:
        """Get and clear any pending message for a session."""
        return self._pending_messages.pop(session_key, None)
    
    def build_source(
        self,
        chat_id: str,
        chat_name: Optional[str] = None,
        chat_type: str = "dm",
        user_id: Optional[str] = None,
        user_name: Optional[str] = None,
        thread_id: Optional[str] = None,
        chat_topic: Optional[str] = None,
        user_id_alt: Optional[str] = None,
        chat_id_alt: Optional[str] = None,
        is_bot: bool = False,
        guild_id: Optional[str] = None,
        parent_chat_id: Optional[str] = None,
        message_id: Optional[str] = None,
    ) -> SessionSource:
        """Helper to build a SessionSource for this platform."""
        # Normalize empty topic to None
        if chat_topic is not None and not chat_topic.strip():
            chat_topic = None
        return SessionSource(
            platform=self.platform,
            chat_id=str(chat_id),
            chat_name=chat_name,
            chat_type=chat_type,
            user_id=str(user_id) if user_id else None,
            user_name=user_name,
            thread_id=str(thread_id) if thread_id else None,
            chat_topic=chat_topic.strip() if chat_topic else None,
            user_id_alt=user_id_alt,
            chat_id_alt=chat_id_alt,
            is_bot=is_bot,
            guild_id=str(guild_id) if guild_id else None,
            parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
            message_id=str(message_id) if message_id else None,
        )
    
    @abstractmethod
    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
        """
        Get information about a chat/channel.
        
        Returns dict with at least:
        - name: Chat name
        - type: "dm", "group", "channel"
        """
        pass
    
    def format_message(self, content: str) -> str:
        """
        Format a message for this platform.
        
        Override in subclasses to handle platform-specific formatting
        (e.g., Telegram MarkdownV2, Discord markdown).
        
        Default implementation returns content as-is.
        """
        return content
    
    @staticmethod
    def truncate_message(
        content: str,
        max_length: int = 4096,
        len_fn: Optional["Callable[[str], int]"] = None,
    ) -> List[str]:
        """
        Split a long message into chunks, preserving code block boundaries.

        When a split falls inside a triple-backtick code block, the fence is
        closed at the end of the current chunk and reopened (with the original
        language tag) at the start of the next chunk.  Multi-chunk responses
        receive indicators like ``(1/3)``.

        Args:
            content: The full message content
            max_length: Maximum length per chunk (platform-specific)
            len_fn: Optional length function for measuring string length.
                     Defaults to ``len`` (Unicode code-points).  Pass
                     ``utf16_len`` for platforms that measure message
                     length in UTF-16 code units (e.g. Telegram).

        Returns:
            List of message chunks
        """
        _len = len_fn or len
        if _len(content) <= max_length:
            return [content]

        INDICATOR_RESERVE = 10   # room for " (XX/XX)"
        FENCE_CLOSE = "\n```"

        chunks: List[str] = []
        remaining = content
        # When the previous chunk ended mid-code-block, this holds the
        # language tag (possibly "") so we can reopen the fence.
        carry_lang: Optional[str] = None

        while remaining:
            # If we're continuing a code block from the previous chunk,
            # prepend a new opening fence with the same language tag.
            prefix = f"```{carry_lang}\n" if carry_lang is not None else ""

            # How much body text we can fit after accounting for the prefix,
            # a potential closing fence, and the chunk indicator.
            headroom = max_length - INDICATOR_RESERVE - _len(prefix) - _len(FENCE_CLOSE)
            if headroom < 1:
                headroom = max_length // 2

            # Everything remaining fits in one final chunk
            if _len(prefix) + _len(remaining) <= max_length - INDICATOR_RESERVE:
                chunks.append(prefix + remaining)
                break

            # Find a natural split point (prefer newlines, then spaces).
            # When _len != len (e.g. utf16_len for Telegram), headroom is
            # measured in the custom unit.  We need codepoint-based slice
            # positions that stay within the custom-unit budget.
            #
            # _safe_slice_pos() maps a custom-unit budget to the largest
            # codepoint offset whose custom length ≤ budget.
            if _len is not len:
                # Map headroom (custom units) → codepoint slice length
                _cp_limit = _custom_unit_to_cp(remaining, headroom, _len)
            else:
                _cp_limit = headroom
            region = remaining[:_cp_limit]
            split_at = region.rfind("\n")
            if split_at < _cp_limit // 2:
                split_at = region.rfind(" ")
            if split_at < 1:
                split_at = _cp_limit

            # Avoid splitting inside an inline code span (`...`).
            # If the text before split_at has an odd number of unescaped
            # backticks, the split falls inside inline code — the resulting
            # chunk would have an unpaired backtick and any special characters
            # (like parentheses) inside the broken span would be unescaped,
            # causing MarkdownV2 parse errors on Telegram.
            candidate = remaining[:split_at]
            backtick_count = candidate.count("`") - candidate.count("\\`")
            if backtick_count % 2 == 1:
                # Find the last unescaped backtick and split before it
                last_bt = candidate.rfind("`")
                while last_bt > 0 and candidate[last_bt - 1] == "\\":
                    last_bt = candidate.rfind("`", 0, last_bt)
                if last_bt > 0:
                    # Try to find a space or newline just before the backtick
                    safe_split = candidate.rfind(" ", 0, last_bt)
                    nl_split = candidate.rfind("\n", 0, last_bt)
                    safe_split = max(safe_split, nl_split)
                    if safe_split > _cp_limit // 4:
                        split_at = safe_split

            chunk_body = remaining[:split_at]
            remaining = remaining[split_at:].lstrip()

            full_chunk = prefix + chunk_body

            # Walk only the chunk_body (not the prefix we prepended) to
            # determine whether we end inside an open code block.
            in_code = carry_lang is not None
            lang = carry_lang or ""
            for line in chunk_body.split("\n"):
                stripped = line.strip()
                if stripped.startswith("```"):
                    if in_code:
                        in_code = False
                        lang = ""
                    else:
                        in_code = True
                        tag = stripped[3:].strip()
                        lang = tag.split()[0] if tag else ""

            if in_code:
                # Close the orphaned fence so the chunk is valid on its own
                full_chunk += FENCE_CLOSE
                carry_lang = lang
            else:
                carry_lang = None

            chunks.append(full_chunk)

        # Append chunk indicators when the response spans multiple messages
        if len(chunks) > 1:
            total = len(chunks)
            chunks = [
                f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
            ]

        return chunks
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								"""
 								Base platform adapter interface.
 								All platform adapters (Telegram, Discord, WhatsApp) inherit from this
 								and implement the required methods.
 								"""
-												fix telegram, import asyncio

											
										
										
											2026-02-03 15:02:41 -08:00
+								import asyncio
-												fix(gateway): keep typing loop overrides backward-compatible

											
										
										
											2026-04-18 21:32:49 -06:00
+								import inspect
-												fix(security): enforce API_SERVER_KEY for non-loopback binding

Add is_network_accessible() helper using Python's ipaddress module to
robustly classify bind addresses (IPv4/IPv6 loopback, wildcards,
mapped addresses, hostname resolution with DNS-failure-fails-closed).

The API server connect() now refuses to start when the bind address is
network-accessible and no API_SERVER_KEY is set, preventing RCE from
other machines on the network.

Co-authored-by: entropidelic <entropidelic@users.noreply.github.com>

											
										
										
											2026-04-10 16:40:54 -07:00
+								import ipaddress
-												add full support for whatsapp

											
										
										
											2026-02-25 21:04:36 -08:00
+								import logging
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								import os
-												fix(gateway): retry transient send failures and notify user on exhaustion (#3288)

When send() fails due to a network error (ConnectError, ReadTimeout, etc.),
the failure was silently logged and the user received no feedback — appearing
as a hang. In one reported case, a user waited 1+ hour for a response that
had already been generated but failed to deliver (#2910).

Adds _send_with_retry() to BasePlatformAdapter:
- Transient errors: retry up to 2x with exponential backoff + jitter
- On exhaustion: send delivery-failure notice so user knows to retry
- Permanent errors: fall back to plain-text version (preserves existing behavior)
- SendResult.retryable flag for platform-specific transient errors

All adapters benefit automatically via BasePlatformAdapter inheritance.

Cherry-picked from PR #3108 by Mibayy.

Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 17:37:10 -07:00
+								import random
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								import re
-												fix(security): enforce API_SERVER_KEY for non-loopback binding

Add is_network_accessible() helper using Python's ipaddress module to
robustly classify bind addresses (IPv4/IPv6 loopback, wildcards,
mapped addresses, hostname resolution with DNS-failure-fails-closed).

The API server connect() now refuses to start when the bind address is
network-accessible and no API_SERVER_KEY is set, preventing RCE from
other machines on the network.

Co-authored-by: entropidelic <entropidelic@users.noreply.github.com>

											
										
										
											2026-04-10 16:40:54 -07:00
+								import socket as _socket
-												feat(gateway): unified proxy support for Discord and Telegram with macOS auto-detection

- Add resolve_proxy_url() to base.py — shared by all platform adapters
- Check HTTPS_PROXY / HTTP_PROXY / ALL_PROXY env vars first
- Fall back to macOS system proxy via scutil --proxy (zero-config)
- Pass proxy= to discord.py commands.Bot() for gateway connectivity
- Refactor telegram_network.py to use shared resolver
- Update test fixtures to accept proxy kwarg

											
										
										
											2026-04-09 19:37:58 +08:00
+								import subprocess
 								import sys
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								import uuid
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								from abc import ABC, abstractmethod
-												fix(gateway): sanitize media URLs in base platform logs

											
										
										
											2026-04-06 23:27:54 +03:00
+								from urllib.parse import urlsplit
-												add full support for whatsapp

											
										
										
											2026-02-25 21:04:36 -08:00
-												fix(agent): normalize socks:// env proxies for httpx/anthropic

WSL2 / Clash-style setups often export ALL_PROXY=socks://127.0.0.1:PORT. httpx and the Anthropic SDK reject that alias and expect socks5://, so agent startup failed early with "Unknown scheme for proxy URL" before any provider request could proceed.

Add shared normalize_proxy_url()/normalize_proxy_env_vars() helpers in utils.py and route all proxy entry points through them:
  - run_agent._get_proxy_from_env
  - agent.auxiliary_client._validate_proxy_env_urls
  - agent.anthropic_adapter.build_anthropic_client
  - gateway.platforms.base.resolve_proxy_url

Regression coverage:
  - run_agent proxy env resolution
  - auxiliary proxy env normalization
  - gateway proxy URL resolution

Verified with:
PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 /home/nonlinear/.hermes/hermes-agent/venv/bin/pytest -o addopts='' -p pytest_asyncio.plugin tests/run_agent/test_create_openai_client_proxy_env.py tests/agent/test_proxy_and_url_validation.py tests/gateway/test_proxy_mode.py

39 passed.

											
										
										
											2026-04-21 17:55:04 +08:00
+								from utils import normalize_proxy_url
-												add full support for whatsapp

											
										
										
											2026-02-25 21:04:36 -08:00
+								logger = logging.getLogger(__name__)
-												feat(gateway): unified proxy support for Discord and Telegram with macOS auto-detection

- Add resolve_proxy_url() to base.py — shared by all platform adapters
- Check HTTPS_PROXY / HTTP_PROXY / ALL_PROXY env vars first
- Fall back to macOS system proxy via scutil --proxy (zero-config)
- Pass proxy= to discord.py commands.Bot() for gateway connectivity
- Refactor telegram_network.py to use shared resolver
- Update test fixtures to accept proxy kwarg

											
										
										
											2026-04-09 19:37:58 +08:00
-												fix(telegram): use UTF-16 code units for message length splitting (#8725)

Port from nearai/ironclaw#2304: Telegram's 4096 character limit is
measured in UTF-16 code units, not Unicode codepoints. Characters
outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B,
musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units.

Previously, truncate_message() used Python's len() which counts
codepoints. This could produce chunks exceeding Telegram's actual limit
when messages contain many astral-plane characters.

Changes:
- Add utf16_len() helper and _prefix_within_utf16_limit() for
  UTF-16-aware string measurement and truncation
- Add _custom_unit_to_cp() binary-search helper that maps a custom-unit
  budget to the largest safe codepoint slice position
- Update truncate_message() to accept optional len_fn parameter
- Telegram adapter now passes len_fn=utf16_len when splitting messages
- Fix fallback truncation in Telegram error handler to use
  _prefix_within_utf16_limit instead of codepoint slicing
- Update send_message_tool.py to use utf16_len for Telegram platform
- Add comprehensive tests: utf16_len, _prefix_within_utf16_limit,
  truncate_message with len_fn (emoji splitting, content preservation,
  code block handling)
- Update mock lambdas in reply_mode tests to accept **kw for len_fn
											
										
										
											2026-04-12 19:06:20 -07:00
+								def utf16_len(s: str) -> int:
 								    """Count UTF-16 code units in *s*.
 								    Telegram's message-length limit (4 096) is measured in UTF-16 code units,
 								    **not** Unicode code-points.  Characters outside the Basic Multilingual
 								    Plane (emoji like 😀, CJK Extension B, musical symbols, …) are encoded as
 								    surrogate pairs and therefore consume **two** UTF-16 code units each, even
 								    though Python's ``len()`` counts them as one.
 								    Ported from nearai/ironclaw#2304 which discovered the same discrepancy in
 								    Rust's ``chars().count()``.
 								    """
 								    return len(s.encode("utf-16-le")) // 2
 								def _prefix_within_utf16_limit(s: str, limit: int) -> str:
 								    """Return the longest prefix of *s* whose UTF-16 length ≤ *limit*.
 								    Unlike a plain ``s[:limit]``, this respects surrogate-pair boundaries so
 								    we never slice a multi-code-unit character in half.
 								    """
 								    if utf16_len(s) <= limit:
 								        return s
 								    # Binary search for the longest safe prefix
 								    lo, hi = 0, len(s)
 								    while lo < hi:
 								        mid = (lo + hi + 1) // 2
 								        if utf16_len(s[:mid]) <= limit:
 								            lo = mid
 								        else:
 								            hi = mid - 1
 								    return s[:lo]
 								def _custom_unit_to_cp(s: str, budget: int, len_fn) -> int:
 								    """Return the largest codepoint offset *n* such that ``len_fn(s[:n]) <= budget``.
 								    Used by :meth:`BasePlatformAdapter.truncate_message` when *len_fn* measures
 								    length in units different from Python codepoints (e.g. UTF-16 code units).
 								    Falls back to binary search which is O(log n) calls to *len_fn*.
 								    """
 								    if len_fn(s) <= budget:
 								        return len(s)
 								    lo, hi = 0, len(s)
 								    while lo < hi:
 								        mid = (lo + hi + 1) // 2
 								        if len_fn(s[:mid]) <= budget:
 								            lo = mid
 								        else:
 								            hi = mid - 1
 								    return lo
-												fix(security): enforce API_SERVER_KEY for non-loopback binding

Add is_network_accessible() helper using Python's ipaddress module to
robustly classify bind addresses (IPv4/IPv6 loopback, wildcards,
mapped addresses, hostname resolution with DNS-failure-fails-closed).

The API server connect() now refuses to start when the bind address is
network-accessible and no API_SERVER_KEY is set, preventing RCE from
other machines on the network.

Co-authored-by: entropidelic <entropidelic@users.noreply.github.com>

											
										
										
											2026-04-10 16:40:54 -07:00
+								def is_network_accessible(host: str) -> bool:
 								    """Return True if *host* would expose the server beyond loopback.
 								    Loopback addresses (127.0.0.1, ::1, IPv4-mapped ::ffff:127.0.0.1)
 								    are local-only.  Unspecified addresses (0.0.0.0, ::) bind all
 								    interfaces.  Hostnames are resolved; DNS failure fails closed.
 								    """
 								    try:
 								        addr = ipaddress.ip_address(host)
 								        if addr.is_loopback:
 								            return False
 								        # ::ffff:127.0.0.1 — Python reports is_loopback=False for mapped
 								        # addresses, so check the underlying IPv4 explicitly.
 								        if getattr(addr, "ipv4_mapped", None) and addr.ipv4_mapped.is_loopback:
 								            return False
 								        return True
 								    except ValueError:
 								        # when host variable is a hostname, we should try to resolve below
 								        pass
 								    try:
 								        resolved = _socket.getaddrinfo(
 								            host, None, _socket.AF_UNSPEC, _socket.SOCK_STREAM,
 								        )
 								        # if the hostname resolves into at least one non-loopback address,
 								        # then we consider it to be network accessible
 								        for _family, _type, _proto, _canonname, sockaddr in resolved:
 								            addr = ipaddress.ip_address(sockaddr[0])
 								            if not addr.is_loopback:
 								                return True
 								        return False
 								    except (_socket.gaierror, OSError):
 								        return True
-												feat(gateway): unified proxy support for Discord and Telegram with macOS auto-detection

- Add resolve_proxy_url() to base.py — shared by all platform adapters
- Check HTTPS_PROXY / HTTP_PROXY / ALL_PROXY env vars first
- Fall back to macOS system proxy via scutil --proxy (zero-config)
- Pass proxy= to discord.py commands.Bot() for gateway connectivity
- Refactor telegram_network.py to use shared resolver
- Update test fixtures to accept proxy kwarg

											
										
										
											2026-04-09 19:37:58 +08:00
+								def _detect_macos_system_proxy() -> str | None:
 								    """Read the macOS system HTTP(S) proxy via ``scutil --proxy``.
 								    Returns an ``http://host:port`` URL string if an HTTP or HTTPS proxy is
 								    enabled, otherwise *None*.  Falls back silently on non-macOS or on any
 								    subprocess error.
 								    """
 								    if sys.platform != "darwin":
 								        return None
 								    try:
 								        out = subprocess.check_output(
 								            ["scutil", "--proxy"], timeout=3, text=True, stderr=subprocess.DEVNULL,
 								        )
 								    except Exception:
 								        return None
 								    props: dict[str, str] = {}
 								    for line in out.splitlines():
 								        line = line.strip()
 								        if " : " in line:
 								            key, _, val = line.partition(" : ")
 								            props[key.strip()] = val.strip()
 								    # Prefer HTTPS, fall back to HTTP
 								    for enable_key, host_key, port_key in (
 								        ("HTTPSEnable", "HTTPSProxy", "HTTPSPort"),
 								        ("HTTPEnable", "HTTPProxy", "HTTPPort"),
 								    ):
 								        if props.get(enable_key) == "1":
 								            host = props.get(host_key)
 								            port = props.get(port_key)
 								            if host and port:
 								                return f"http://{host}:{port}"
 								    return None
-												fix(telegram): honor no_proxy for explicit proxy setup

											
										
										
											2026-04-24 15:09:47 -06:00
+								def _split_host_port(value: str) -> tuple[str, int | None]:
 								    raw = str(value or "").strip()
 								    if not raw:
 								        return "", None
 								    if "://" in raw:
 								        parsed = urlsplit(raw)
 								        return (parsed.hostname or "").lower().rstrip("."), parsed.port
 								    if raw.startswith("[") and "]" in raw:
 								        host, _, rest = raw[1:].partition("]")
 								        port = None
 								        if rest.startswith(":") and rest[1:].isdigit():
 								            port = int(rest[1:])
 								        return host.lower().rstrip("."), port
 								    if raw.count(":") == 1:
 								        host, _, maybe_port = raw.rpartition(":")
 								        if maybe_port.isdigit():
 								            return host.lower().rstrip("."), int(maybe_port)
 								    return raw.lower().strip("[]").rstrip("."), None
 								def _no_proxy_entries() -> list[str]:
 								    entries: list[str] = []
 								    for key in ("NO_PROXY", "no_proxy"):
 								        raw = os.environ.get(key, "")
 								        entries.extend(part.strip() for part in raw.split(",") if part.strip())
 								    return entries
 								def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
 								    token = str(entry or "").strip().lower()
 								    if not token:
 								        return False
 								    if token == "*":
 								        return True
 								    token_host, token_port = _split_host_port(token)
 								    if token_port is not None and port is not None and token_port != port:
 								        return False
 								    if token_port is not None and port is None:
 								        return False
 								    if not token_host:
 								        return False
 								    try:
 								        network = ipaddress.ip_network(token_host, strict=False)
 								        try:
 								            return ipaddress.ip_address(host) in network
 								        except ValueError:
 								            return False
 								    except ValueError:
 								        pass
 								    try:
 								        token_ip = ipaddress.ip_address(token_host)
 								        try:
 								            return ipaddress.ip_address(host) == token_ip
 								        except ValueError:
 								            return False
 								    except ValueError:
 								        pass
 								    if token_host.startswith("*."):
 								        suffix = token_host[1:]
 								        return host.endswith(suffix)
 								    if token_host.startswith("."):
 								        return host == token_host[1:] or host.endswith(token_host)
 								    return host == token_host or host.endswith(f".{token_host}")
 								def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
 								    """Return True when NO_PROXY/no_proxy matches at least one target host.
 								    Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
 								    CIDR ranges, optional host:port entries, and ``*``.
 								    """
 								    entries = _no_proxy_entries()
 								    if not entries or not target_hosts:
 								        return False
 								    if isinstance(target_hosts, str):
 								        candidates = [target_hosts]
 								    else:
 								        candidates = list(target_hosts)
 								    for candidate in candidates:
 								        host, port = _split_host_port(str(candidate))
 								        if not host:
 								            continue
 								        if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
 								            return True
 								    return False
 								def resolve_proxy_url(
 								    platform_env_var: str | None = None,
 								    *,
 								    target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
 								) -> str | None:
-												fix: add SOCKS proxy support, DISCORD_PROXY env var, and send_message proxy coverage

Follow-up improvements on top of the shared resolver from PR #6562:

- Add platform_env_var parameter to resolve_proxy_url() so DISCORD_PROXY
  takes priority over generic HTTPS_PROXY/ALL_PROXY env vars
- Add SOCKS proxy support via aiohttp_socks.ProxyConnector with rdns=True
  (critical for GFW/Shadowrocket/Clash users — issue #6649)
- proxy_kwargs_for_bot() returns connector= for SOCKS, proxy= for HTTP
- proxy_kwargs_for_aiohttp() returns split (session_kw, request_kw) for
  standalone aiohttp sessions
- Add proxy support to send_message_tool.py (Discord REST, Slack, SMS)
  for cron job delivery behind proxies (from PR #2208)
- Add proxy support to Discord image/document downloads
- Fix duplicate import sys in base.py

											
										
										
											2026-04-09 14:16:39 -07:00
+								    """Return a proxy URL from env vars, or macOS system proxy.
-												feat(gateway): unified proxy support for Discord and Telegram with macOS auto-detection

- Add resolve_proxy_url() to base.py — shared by all platform adapters
- Check HTTPS_PROXY / HTTP_PROXY / ALL_PROXY env vars first
- Fall back to macOS system proxy via scutil --proxy (zero-config)
- Pass proxy= to discord.py commands.Bot() for gateway connectivity
- Refactor telegram_network.py to use shared resolver
- Update test fixtures to accept proxy kwarg

											
										
										
											2026-04-09 19:37:58 +08:00
 								    Check order:
-												fix: add SOCKS proxy support, DISCORD_PROXY env var, and send_message proxy coverage

Follow-up improvements on top of the shared resolver from PR #6562:

- Add platform_env_var parameter to resolve_proxy_url() so DISCORD_PROXY
  takes priority over generic HTTPS_PROXY/ALL_PROXY env vars
- Add SOCKS proxy support via aiohttp_socks.ProxyConnector with rdns=True
  (critical for GFW/Shadowrocket/Clash users — issue #6649)
- proxy_kwargs_for_bot() returns connector= for SOCKS, proxy= for HTTP
- proxy_kwargs_for_aiohttp() returns split (session_kw, request_kw) for
  standalone aiohttp sessions
- Add proxy support to send_message_tool.py (Discord REST, Slack, SMS)
  for cron job delivery behind proxies (from PR #2208)
- Add proxy support to Discord image/document downloads
- Fix duplicate import sys in base.py

											
										
										
											2026-04-09 14:16:39 -07:00
+. *platform_env_var* (e.g. ``DISCORD_PROXY``) — highest priority
-												feat(gateway): unified proxy support for Discord and Telegram with macOS auto-detection

- Add resolve_proxy_url() to base.py — shared by all platform adapters
- Check HTTPS_PROXY / HTTP_PROXY / ALL_PROXY env vars first
- Fall back to macOS system proxy via scutil --proxy (zero-config)
- Pass proxy= to discord.py commands.Bot() for gateway connectivity
- Refactor telegram_network.py to use shared resolver
- Update test fixtures to accept proxy kwarg

											
										
										
											2026-04-09 19:37:58 +08:00
+. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
 . macOS system proxy via ``scutil --proxy`` (auto-detect)
-												fix(telegram): honor no_proxy for explicit proxy setup

											
										
										
											2026-04-24 15:09:47 -06:00
+								    Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
 								    of ``target_hosts``.
-												feat(gateway): unified proxy support for Discord and Telegram with macOS auto-detection

- Add resolve_proxy_url() to base.py — shared by all platform adapters
- Check HTTPS_PROXY / HTTP_PROXY / ALL_PROXY env vars first
- Fall back to macOS system proxy via scutil --proxy (zero-config)
- Pass proxy= to discord.py commands.Bot() for gateway connectivity
- Refactor telegram_network.py to use shared resolver
- Update test fixtures to accept proxy kwarg

											
										
										
											2026-04-09 19:37:58 +08:00
+								    """
-												fix: add SOCKS proxy support, DISCORD_PROXY env var, and send_message proxy coverage

Follow-up improvements on top of the shared resolver from PR #6562:

- Add platform_env_var parameter to resolve_proxy_url() so DISCORD_PROXY
  takes priority over generic HTTPS_PROXY/ALL_PROXY env vars
- Add SOCKS proxy support via aiohttp_socks.ProxyConnector with rdns=True
  (critical for GFW/Shadowrocket/Clash users — issue #6649)
- proxy_kwargs_for_bot() returns connector= for SOCKS, proxy= for HTTP
- proxy_kwargs_for_aiohttp() returns split (session_kw, request_kw) for
  standalone aiohttp sessions
- Add proxy support to send_message_tool.py (Discord REST, Slack, SMS)
  for cron job delivery behind proxies (from PR #2208)
- Add proxy support to Discord image/document downloads
- Fix duplicate import sys in base.py

											
										
										
											2026-04-09 14:16:39 -07:00
+								    if platform_env_var:
 								        value = (os.environ.get(platform_env_var) or "").strip()
 								        if value:
-												fix(telegram): honor no_proxy for explicit proxy setup

											
										
										
											2026-04-24 15:09:47 -06:00
+								            if should_bypass_proxy(target_hosts):
 								                return None
-												fix(agent): normalize socks:// env proxies for httpx/anthropic

WSL2 / Clash-style setups often export ALL_PROXY=socks://127.0.0.1:PORT. httpx and the Anthropic SDK reject that alias and expect socks5://, so agent startup failed early with "Unknown scheme for proxy URL" before any provider request could proceed.

Add shared normalize_proxy_url()/normalize_proxy_env_vars() helpers in utils.py and route all proxy entry points through them:
  - run_agent._get_proxy_from_env
  - agent.auxiliary_client._validate_proxy_env_urls
  - agent.anthropic_adapter.build_anthropic_client
  - gateway.platforms.base.resolve_proxy_url

Regression coverage:
  - run_agent proxy env resolution
  - auxiliary proxy env normalization
  - gateway proxy URL resolution

Verified with:
PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 /home/nonlinear/.hermes/hermes-agent/venv/bin/pytest -o addopts='' -p pytest_asyncio.plugin tests/run_agent/test_create_openai_client_proxy_env.py tests/agent/test_proxy_and_url_validation.py tests/gateway/test_proxy_mode.py

39 passed.

											
										
										
											2026-04-21 17:55:04 +08:00
+								            return normalize_proxy_url(value)
-												feat(gateway): unified proxy support for Discord and Telegram with macOS auto-detection

- Add resolve_proxy_url() to base.py — shared by all platform adapters
- Check HTTPS_PROXY / HTTP_PROXY / ALL_PROXY env vars first
- Fall back to macOS system proxy via scutil --proxy (zero-config)
- Pass proxy= to discord.py commands.Bot() for gateway connectivity
- Refactor telegram_network.py to use shared resolver
- Update test fixtures to accept proxy kwarg

											
										
										
											2026-04-09 19:37:58 +08:00
+								    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
 								                "https_proxy", "http_proxy", "all_proxy"):
 								        value = (os.environ.get(key) or "").strip()
 								        if value:
-												fix(telegram): honor no_proxy for explicit proxy setup

											
										
										
											2026-04-24 15:09:47 -06:00
+								            if should_bypass_proxy(target_hosts):
 								                return None
-												fix(agent): normalize socks:// env proxies for httpx/anthropic

WSL2 / Clash-style setups often export ALL_PROXY=socks://127.0.0.1:PORT. httpx and the Anthropic SDK reject that alias and expect socks5://, so agent startup failed early with "Unknown scheme for proxy URL" before any provider request could proceed.

Add shared normalize_proxy_url()/normalize_proxy_env_vars() helpers in utils.py and route all proxy entry points through them:
  - run_agent._get_proxy_from_env
  - agent.auxiliary_client._validate_proxy_env_urls
  - agent.anthropic_adapter.build_anthropic_client
  - gateway.platforms.base.resolve_proxy_url

Regression coverage:
  - run_agent proxy env resolution
  - auxiliary proxy env normalization
  - gateway proxy URL resolution

Verified with:
PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 /home/nonlinear/.hermes/hermes-agent/venv/bin/pytest -o addopts='' -p pytest_asyncio.plugin tests/run_agent/test_create_openai_client_proxy_env.py tests/agent/test_proxy_and_url_validation.py tests/gateway/test_proxy_mode.py

39 passed.

											
										
										
											2026-04-21 17:55:04 +08:00
+								            return normalize_proxy_url(value)
-												fix(telegram): honor no_proxy for explicit proxy setup

											
										
										
											2026-04-24 15:09:47 -06:00
+								    detected = normalize_proxy_url(_detect_macos_system_proxy())
 								    if detected and should_bypass_proxy(target_hosts):
 								        return None
 								    return detected
-												fix: add SOCKS proxy support, DISCORD_PROXY env var, and send_message proxy coverage

Follow-up improvements on top of the shared resolver from PR #6562:

- Add platform_env_var parameter to resolve_proxy_url() so DISCORD_PROXY
  takes priority over generic HTTPS_PROXY/ALL_PROXY env vars
- Add SOCKS proxy support via aiohttp_socks.ProxyConnector with rdns=True
  (critical for GFW/Shadowrocket/Clash users — issue #6649)
- proxy_kwargs_for_bot() returns connector= for SOCKS, proxy= for HTTP
- proxy_kwargs_for_aiohttp() returns split (session_kw, request_kw) for
  standalone aiohttp sessions
- Add proxy support to send_message_tool.py (Discord REST, Slack, SMS)
  for cron job delivery behind proxies (from PR #2208)
- Add proxy support to Discord image/document downloads
- Fix duplicate import sys in base.py

											
										
										
											2026-04-09 14:16:39 -07:00
 								def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
 								    """Build kwargs for ``commands.Bot()`` / ``discord.Client()`` with proxy.
 								    Returns:
 								      - SOCKS URL  → ``{"connector": ProxyConnector(..., rdns=True)}``
 								      - HTTP URL   → ``{"proxy": url}``
 								      - *None*     → ``{}``
 								    ``rdns=True`` forces remote DNS resolution through the proxy — required
 								    by many SOCKS implementations (Shadowrocket, Clash) and essential for
 								    bypassing DNS pollution behind the GFW.
 								    """
 								    if not proxy_url:
 								        return {}
 								    if proxy_url.lower().startswith("socks"):
 								        try:
 								            from aiohttp_socks import ProxyConnector
 								            connector = ProxyConnector.from_url(proxy_url, rdns=True)
 								            return {"connector": connector}
 								        except ImportError:
 								            logger.warning(
 								                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
 								                "Run: pip install aiohttp-socks",
 								                proxy_url,
 								            )
 								            return {}
 								    return {"proxy": proxy_url}
 								def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
 								    """Build kwargs for standalone ``aiohttp.ClientSession`` with proxy.
 								    Returns ``(session_kwargs, request_kwargs)`` where:
 								      - SOCKS → ``({"connector": ProxyConnector(...)}, {})``
 								      - HTTP  → ``({}, {"proxy": url})``
 								      - None  → ``({}, {})``
 								    Usage::
 								        sess_kw, req_kw = proxy_kwargs_for_aiohttp(proxy_url)
 								        async with aiohttp.ClientSession(**sess_kw) as session:
 								            async with session.get(url, **req_kw) as resp:
 								                ...
 								    """
 								    if not proxy_url:
 								        return {}, {}
 								    if proxy_url.lower().startswith("socks"):
 								        try:
 								            from aiohttp_socks import ProxyConnector
 								            connector = ProxyConnector.from_url(proxy_url, rdns=True)
 								            return {"connector": connector}, {}
 								        except ImportError:
 								            logger.warning(
 								                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
 								                "Run: pip install aiohttp-socks",
 								                proxy_url,
 								            )
 								            return {}, {}
 								    return {}, {"proxy": proxy_url}
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								from dataclasses import dataclass, field
 								from datetime import datetime
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								from pathlib import Path
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								from enum import Enum
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								from pathlib import Path as _Path
 								sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								from gateway.config import Platform, PlatformConfig
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								from gateway.session import SessionSource, build_session_key
-												refactor: consolidate ~/.hermes directory layout with backward compat (#3610)

New installs get a cleaner structure:
  cache/images/      (was image_cache/)
  cache/audio/       (was audio_cache/)
  cache/documents/   (was document_cache/)
  cache/screenshots/ (was browser_screenshots/)
  platforms/whatsapp/session/ (was whatsapp/session/)
  platforms/matrix/store/    (was matrix/store/)
  platforms/pairing/         (was pairing/)

Existing installs are unaffected -- get_hermes_dir() checks for the
old path first and uses it if present. No migration needed.

Adds get_hermes_dir(new_subpath, old_name) helper to hermes_constants.py
for reuse by any future subsystem.
											
										
										
											2026-03-28 15:22:19 -07:00
+								from hermes_constants import get_hermes_dir
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat: secure skill env setup on load (core #688)

When a skill declares required_environment_variables in its YAML
frontmatter, missing env vars trigger a secure TUI prompt (identical
to the sudo password widget) when the skill is loaded. Secrets flow
directly to ~/.hermes/.env, never entering LLM context.

Key changes:
- New required_environment_variables frontmatter field for skills
- Secure TUI widget (masked input, 120s timeout)
- Gateway safety: messaging platforms show local setup guidance
- Legacy prerequisites.env_vars normalized into new format
- Remote backend handling: conservative setup_needed=True
- Env var name validation, file permissions hardened to 0o600
- Redact patterns extended for secret-related JSON fields
- 12 existing skills updated with prerequisites declarations
- ~48 new tests covering skip, timeout, gateway, remote backends
- Dynamic panel widget sizing (fixes hardcoded width from original PR)

Cherry-picked from PR #723 by kshitijk4poor, rebased onto current main
with conflict resolution.

Fixes #688

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>

											
										
										
											2026-03-13 03:14:04 -07:00
+								GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
 								    "Secure secret entry is not supported over messaging. "
-												fix: improve gateway secret capture guidance message

The old message referenced 'hermes setup' which doesn't handle
skill-specific env vars. Updated to direct users to load the skill
in the local CLI (which triggers the secure prompt) or add the key
to ~/.hermes/.env manually.

											
										
										
											2026-03-13 04:10:15 -07:00
+								    "Load this skill in the local CLI to be prompted, or add the key to ~/.hermes/.env manually."
-												feat: secure skill env setup on load (core #688)

When a skill declares required_environment_variables in its YAML
frontmatter, missing env vars trigger a secure TUI prompt (identical
to the sudo password widget) when the skill is loaded. Secrets flow
directly to ~/.hermes/.env, never entering LLM context.

Key changes:
- New required_environment_variables frontmatter field for skills
- Secure TUI widget (masked input, 120s timeout)
- Gateway safety: messaging platforms show local setup guidance
- Legacy prerequisites.env_vars normalized into new format
- Remote backend handling: conservative setup_needed=True
- Env var name validation, file permissions hardened to 0o600
- Redact patterns extended for secret-related JSON fields
- 12 existing skills updated with prerequisites declarations
- ~48 new tests covering skip, timeout, gateway, remote backends
- Dynamic panel widget sizing (fixes hardcoded width from original PR)

Cherry-picked from PR #723 by kshitijk4poor, rebased onto current main
with conflict resolution.

Fixes #688

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>

											
										
										
											2026-03-13 03:14:04 -07:00
+								)
-												fix: make safe_url_for_log public, add SSRF redirect guards to base.py cache helpers

Follow-up to Dusk1e's PR #7120 (Slack send_image redirect guard):
- Rename _safe_url_for_log -> safe_url_for_log (drop underscore) since
  it is now imported cross-module by the Slack adapter
- Add _ssrf_redirect_guard httpx event hook to cache_image_from_url()
  and cache_audio_from_url() in base.py — same pattern as vision_tools
  and the Slack adapter fix
- Update url_safety.py docstring to reflect broader coverage
- Add regression tests for image/audio redirect blocking + safe passthrough

											
										
										
											2026-04-10 05:02:17 -07:00
+								def safe_url_for_log(url: str, max_len: int = 80) -> str:
-												fix(gateway): sanitize media URLs in base platform logs

											
										
										
											2026-04-06 23:27:54 +03:00
+								    """Return a URL string safe for logs (no query/fragment/userinfo)."""
 								    if max_len <= 0:
 								        return ""
 								    if url is None:
 								        return ""
 								    raw = str(url)
 								    if not raw:
 								        return ""
 								    try:
 								        parsed = urlsplit(raw)
 								    except Exception:
 								        return raw[:max_len]
 								    if parsed.scheme and parsed.netloc:
 								        # Strip potential embedded credentials (user:pass@host).
 								        netloc = parsed.netloc.rsplit("@", 1)[-1]
 								        base = f"{parsed.scheme}://{netloc}"
 								        path = parsed.path or ""
 								        if path and path != "/":
 								            basename = path.rsplit("/", 1)[-1]
 								            safe = f"{base}/.../{basename}" if basename else f"{base}/..."
 								        else:
 								            safe = base
 								    else:
 								        safe = raw
 								    if len(safe) <= max_len:
 								        return safe
 								    if max_len <= 3:
 								        return "." * max_len
 								    return f"{safe[:max_len - 3]}..."
-												fix: make safe_url_for_log public, add SSRF redirect guards to base.py cache helpers

Follow-up to Dusk1e's PR #7120 (Slack send_image redirect guard):
- Rename _safe_url_for_log -> safe_url_for_log (drop underscore) since
  it is now imported cross-module by the Slack adapter
- Add _ssrf_redirect_guard httpx event hook to cache_image_from_url()
  and cache_audio_from_url() in base.py — same pattern as vision_tools
  and the Slack adapter fix
- Update url_safety.py docstring to reflect broader coverage
- Add regression tests for image/audio redirect blocking + safe passthrough

											
										
										
											2026-04-10 05:02:17 -07:00
+								async def _ssrf_redirect_guard(response):
 								    """Re-validate each redirect target to prevent redirect-based SSRF.
 								    Without this, an attacker can host a public URL that 302-redirects to
 								    http://169.254.169.254/ and bypass the pre-flight is_safe_url() check.
 								    Must be async because httpx.AsyncClient awaits response event hooks.
 								    """
 								    if response.is_redirect and response.next_request:
 								        redirect_url = str(response.next_request.url)
 								        from tools.url_safety import is_safe_url
 								        if not is_safe_url(redirect_url):
 								            raise ValueError(
 								                f"Blocked redirect to private/internal address: {safe_url_for_log(redirect_url)}"
 								            )
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								# ---------------------------------------------------------------------------
 								# Image cache utilities
 								#
 								# When users send images on messaging platforms, we download them to a local
 								# cache directory so they can be analyzed by the vision tool (which accepts
 								# local file paths). This avoids issues with ephemeral platform URLs
 								# (e.g. Telegram file URLs expire after ~1 hour).
 								# ---------------------------------------------------------------------------
-												refactor: consolidate ~/.hermes directory layout with backward compat (#3610)

New installs get a cleaner structure:
  cache/images/      (was image_cache/)
  cache/audio/       (was audio_cache/)
  cache/documents/   (was document_cache/)
  cache/screenshots/ (was browser_screenshots/)
  platforms/whatsapp/session/ (was whatsapp/session/)
  platforms/matrix/store/    (was matrix/store/)
  platforms/pairing/         (was pairing/)

Existing installs are unaffected -- get_hermes_dir() checks for the
old path first and uses it if present. No migration needed.

Adds get_hermes_dir(new_subpath, old_name) helper to hermes_constants.py
for reuse by any future subsystem.
											
										
										
											2026-03-28 15:22:19 -07:00
+								# Default location: {HERMES_HOME}/cache/images/ (legacy: image_cache/)
 								IMAGE_CACHE_DIR = get_hermes_dir("cache/images", "image_cache")
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
 								def get_image_cache_dir() -> Path:
 								    """Return the image cache directory, creating it if it doesn't exist."""
 								    IMAGE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
 								    return IMAGE_CACHE_DIR
-												fix(gateway): validate Slack image downloads before caching

Slack may return an HTML sign-in/redirect page instead of actual media
bytes (e.g. expired token, restricted file access). This adds two layers
of defense:

1. Content-Type check in slack.py rejects text/html responses early
2. Magic-byte validation in base.py's cache_image_from_bytes() rejects
   non-image data regardless of source platform

Also adds ValueError guards in wecom.py and email.py so the new
validation doesn't crash those adapters.

Closes #6829

											
										
										
											2026-04-10 03:52:46 -07:00
+								def _looks_like_image(data: bytes) -> bool:
 								    """Return True if *data* starts with a known image magic-byte sequence."""
 								    if len(data) < 4:
 								        return False
 								    if data[:8] == b"\x89PNG\r\n\x1a\n":
 								        return True
 								    if data[:3] == b"\xff\xd8\xff":
 								        return True
 								    if data[:6] in (b"GIF87a", b"GIF89a"):
 								        return True
 								    if data[:2] == b"BM":
 								        return True
 								    if data[:4] == b"RIFF" and len(data) >= 12 and data[8:12] == b"WEBP":
 								        return True
 								    return False
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
 								    """
 								    Save raw image bytes to the cache and return the absolute file path.
 								    Args:
 								        data: Raw image bytes.
 								        ext:  File extension including the dot (e.g. ".jpg", ".png").
 								    Returns:
 								        Absolute path to the cached image file as a string.
-												fix(gateway): validate Slack image downloads before caching

Slack may return an HTML sign-in/redirect page instead of actual media
bytes (e.g. expired token, restricted file access). This adds two layers
of defense:

1. Content-Type check in slack.py rejects text/html responses early
2. Magic-byte validation in base.py's cache_image_from_bytes() rejects
   non-image data regardless of source platform

Also adds ValueError guards in wecom.py and email.py so the new
validation doesn't crash those adapters.

Closes #6829

											
										
										
											2026-04-10 03:52:46 -07:00
 								    Raises:
 								        ValueError: If *data* does not look like a valid image (e.g. an HTML
 								            error page returned by the upstream server).
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								    """
-												fix(gateway): validate Slack image downloads before caching

Slack may return an HTML sign-in/redirect page instead of actual media
bytes (e.g. expired token, restricted file access). This adds two layers
of defense:

1. Content-Type check in slack.py rejects text/html responses early
2. Magic-byte validation in base.py's cache_image_from_bytes() rejects
   non-image data regardless of source platform

Also adds ValueError guards in wecom.py and email.py so the new
validation doesn't crash those adapters.

Closes #6829

											
										
										
											2026-04-10 03:52:46 -07:00
+								    if not _looks_like_image(data):
 								        snippet = data[:80].decode("utf-8", errors="replace")
 								        raise ValueError(
 								            f"Refusing to cache non-image data as {ext} "
 								            f"(starts with: {snippet!r})"
 								        )
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								    cache_dir = get_image_cache_dir()
 								    filename = f"img_{uuid.uuid4().hex[:12]}{ext}"
 								    filepath = cache_dir / filename
 								    filepath.write_bytes(data)
 								    return str(filepath)
-												fix(gateway): add media download retry to Mattermost, Slack, and base cache (#3323)

* fix(gateway): add media download retry to Mattermost, Slack, and base cache

Media downloads on Mattermost and Slack fail permanently on transient
errors (timeouts, 429 rate limits, 5xx server errors). Telegram and
WhatsApp already have retry logic, but these platforms had single-attempt
downloads with hardcoded 30s timeouts.

Changes:
- base.py cache_image_from_url: add retry with exponential backoff
  (covers Signal and any platform using the shared cache helper)
- mattermost.py _send_media_url: retry on 429/5xx/timeout (3 attempts)
- slack.py _download_slack_file: retry on timeout/5xx (3 attempts)
- slack.py _download_slack_file_bytes: same retry pattern

* test: add tests for media download retry

---------

Co-authored-by: dieutx <dangtc94@gmail.com>
											
										
										
											2026-03-26 19:33:18 -07:00
+								async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) -> str:
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								    """
 								    Download an image from a URL and save it to the local cache.
-												fix(gateway): add media download retry to Mattermost, Slack, and base cache (#3323)

* fix(gateway): add media download retry to Mattermost, Slack, and base cache

Media downloads on Mattermost and Slack fail permanently on transient
errors (timeouts, 429 rate limits, 5xx server errors). Telegram and
WhatsApp already have retry logic, but these platforms had single-attempt
downloads with hardcoded 30s timeouts.

Changes:
- base.py cache_image_from_url: add retry with exponential backoff
  (covers Signal and any platform using the shared cache helper)
- mattermost.py _send_media_url: retry on 429/5xx/timeout (3 attempts)
- slack.py _download_slack_file: retry on timeout/5xx (3 attempts)
- slack.py _download_slack_file_bytes: same retry pattern

* test: add tests for media download retry

---------

Co-authored-by: dieutx <dangtc94@gmail.com>
											
										
										
											2026-03-26 19:33:18 -07:00
+								    Retries on transient failures (timeouts, 429, 5xx) with exponential
 								    backoff so a single slow CDN response doesn't lose the media.
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
 								    Args:
 								        url: The HTTP/HTTPS URL to download from.
 								        ext: File extension including the dot (e.g. ".jpg", ".png").
-												fix(gateway): add media download retry to Mattermost, Slack, and base cache (#3323)

* fix(gateway): add media download retry to Mattermost, Slack, and base cache

Media downloads on Mattermost and Slack fail permanently on transient
errors (timeouts, 429 rate limits, 5xx server errors). Telegram and
WhatsApp already have retry logic, but these platforms had single-attempt
downloads with hardcoded 30s timeouts.

Changes:
- base.py cache_image_from_url: add retry with exponential backoff
  (covers Signal and any platform using the shared cache helper)
- mattermost.py _send_media_url: retry on 429/5xx/timeout (3 attempts)
- slack.py _download_slack_file: retry on timeout/5xx (3 attempts)
- slack.py _download_slack_file_bytes: same retry pattern

* test: add tests for media download retry

---------

Co-authored-by: dieutx <dangtc94@gmail.com>
											
										
										
											2026-03-26 19:33:18 -07:00
+								        retries: Number of retry attempts on transient failures.
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
 								    Returns:
 								        Absolute path to the cached image file as a string.
-												fix(security): consolidated security hardening — SSRF, timing attack, tar traversal, credential leakage (#5944)

Salvaged from PRs #5800 (memosr), #5806 (memosr), #5915 (Ruzzgar), #5928 (Awsh1).

Changes:
- Use hmac.compare_digest for API key comparison (timing attack prevention)
- Apply provider env var blocklist to Docker containers (credential leakage)
- Replace tar.extractall() with safe extraction in TerminalBench2 (CVE-2007-4559)
- Add SSRF protection via is_safe_url to ALL platform adapters:
  base.py (cache_image_from_url, cache_audio_from_url),
  discord, slack, telegram, matrix, mattermost, feishu, wecom
  (Signal and WhatsApp protected via base.py helpers)
- Update tests: mock is_safe_url in Mattermost download tests
- Add security tests for tar extraction (traversal, symlinks, safe files)
											
										
										
											2026-04-07 17:28:37 -07:00
 								    Raises:
 								        ValueError: If the URL targets a private/internal network (SSRF protection).
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								    """
-												fix(security): consolidated security hardening — SSRF, timing attack, tar traversal, credential leakage (#5944)

Salvaged from PRs #5800 (memosr), #5806 (memosr), #5915 (Ruzzgar), #5928 (Awsh1).

Changes:
- Use hmac.compare_digest for API key comparison (timing attack prevention)
- Apply provider env var blocklist to Docker containers (credential leakage)
- Replace tar.extractall() with safe extraction in TerminalBench2 (CVE-2007-4559)
- Add SSRF protection via is_safe_url to ALL platform adapters:
  base.py (cache_image_from_url, cache_audio_from_url),
  discord, slack, telegram, matrix, mattermost, feishu, wecom
  (Signal and WhatsApp protected via base.py helpers)
- Update tests: mock is_safe_url in Mattermost download tests
- Add security tests for tar extraction (traversal, symlinks, safe files)
											
										
										
											2026-04-07 17:28:37 -07:00
+								    from tools.url_safety import is_safe_url
 								    if not is_safe_url(url):
-												fix: make safe_url_for_log public, add SSRF redirect guards to base.py cache helpers

Follow-up to Dusk1e's PR #7120 (Slack send_image redirect guard):
- Rename _safe_url_for_log -> safe_url_for_log (drop underscore) since
  it is now imported cross-module by the Slack adapter
- Add _ssrf_redirect_guard httpx event hook to cache_image_from_url()
  and cache_audio_from_url() in base.py — same pattern as vision_tools
  and the Slack adapter fix
- Update url_safety.py docstring to reflect broader coverage
- Add regression tests for image/audio redirect blocking + safe passthrough

											
										
										
											2026-04-10 05:02:17 -07:00
+								        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
-												fix(security): consolidated security hardening — SSRF, timing attack, tar traversal, credential leakage (#5944)

Salvaged from PRs #5800 (memosr), #5806 (memosr), #5915 (Ruzzgar), #5928 (Awsh1).

Changes:
- Use hmac.compare_digest for API key comparison (timing attack prevention)
- Apply provider env var blocklist to Docker containers (credential leakage)
- Replace tar.extractall() with safe extraction in TerminalBench2 (CVE-2007-4559)
- Add SSRF protection via is_safe_url to ALL platform adapters:
  base.py (cache_image_from_url, cache_audio_from_url),
  discord, slack, telegram, matrix, mattermost, feishu, wecom
  (Signal and WhatsApp protected via base.py helpers)
- Update tests: mock is_safe_url in Mattermost download tests
- Add security tests for tar extraction (traversal, symlinks, safe files)
											
										
										
											2026-04-07 17:28:37 -07:00
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								    import httpx
-												refactor: remove redundant local imports already available at module level

Sweep ~74 redundant local imports across 21 files where the same module
was already imported at the top level. Also includes type fixes and lint
cleanups on the same branch.

											
										
										
											2026-04-21 12:35:10 +05:30
+								    _log = logging.getLogger(__name__)
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
-												fix: make safe_url_for_log public, add SSRF redirect guards to base.py cache helpers

Follow-up to Dusk1e's PR #7120 (Slack send_image redirect guard):
- Rename _safe_url_for_log -> safe_url_for_log (drop underscore) since
  it is now imported cross-module by the Slack adapter
- Add _ssrf_redirect_guard httpx event hook to cache_image_from_url()
  and cache_audio_from_url() in base.py — same pattern as vision_tools
  and the Slack adapter fix
- Update url_safety.py docstring to reflect broader coverage
- Add regression tests for image/audio redirect blocking + safe passthrough

											
										
										
											2026-04-10 05:02:17 -07:00
+								    async with httpx.AsyncClient(
 								        timeout=30.0,
 								        follow_redirects=True,
 								        event_hooks={"response": [_ssrf_redirect_guard]},
 								    ) as client:
-												fix(gateway): add media download retry to Mattermost, Slack, and base cache (#3323)

* fix(gateway): add media download retry to Mattermost, Slack, and base cache

Media downloads on Mattermost and Slack fail permanently on transient
errors (timeouts, 429 rate limits, 5xx server errors). Telegram and
WhatsApp already have retry logic, but these platforms had single-attempt
downloads with hardcoded 30s timeouts.

Changes:
- base.py cache_image_from_url: add retry with exponential backoff
  (covers Signal and any platform using the shared cache helper)
- mattermost.py _send_media_url: retry on 429/5xx/timeout (3 attempts)
- slack.py _download_slack_file: retry on timeout/5xx (3 attempts)
- slack.py _download_slack_file_bytes: same retry pattern

* test: add tests for media download retry

---------

Co-authored-by: dieutx <dangtc94@gmail.com>
											
										
										
											2026-03-26 19:33:18 -07:00
+								        for attempt in range(retries + 1):
 								            try:
 								                response = await client.get(
 								                    url,
 								                    headers={
 								                        "User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
 								                        "Accept": "image/*,*/*;q=0.8",
 								                    },
 								                )
 								                response.raise_for_status()
 								                return cache_image_from_bytes(response.content, ext)
 								            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
 								                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
 								                    raise
 								                if attempt < retries:
 								                    wait = 1.5 * (attempt + 1)
-												fix(gateway): sanitize media URLs in base platform logs

											
										
										
											2026-04-06 23:27:54 +03:00
+								                    _log.debug(
 								                        "Media cache retry %d/%d for %s (%.1fs): %s",
 								                        attempt + 1,
 								                        retries,
-												fix: make safe_url_for_log public, add SSRF redirect guards to base.py cache helpers

Follow-up to Dusk1e's PR #7120 (Slack send_image redirect guard):
- Rename _safe_url_for_log -> safe_url_for_log (drop underscore) since
  it is now imported cross-module by the Slack adapter
- Add _ssrf_redirect_guard httpx event hook to cache_image_from_url()
  and cache_audio_from_url() in base.py — same pattern as vision_tools
  and the Slack adapter fix
- Update url_safety.py docstring to reflect broader coverage
- Add regression tests for image/audio redirect blocking + safe passthrough

											
										
										
											2026-04-10 05:02:17 -07:00
+								                        safe_url_for_log(url),
-												fix(gateway): sanitize media URLs in base platform logs

											
										
										
											2026-04-06 23:27:54 +03:00
+								                        wait,
 								                        exc,
 								                    )
-												fix(gateway): add media download retry to Mattermost, Slack, and base cache (#3323)

* fix(gateway): add media download retry to Mattermost, Slack, and base cache

Media downloads on Mattermost and Slack fail permanently on transient
errors (timeouts, 429 rate limits, 5xx server errors). Telegram and
WhatsApp already have retry logic, but these platforms had single-attempt
downloads with hardcoded 30s timeouts.

Changes:
- base.py cache_image_from_url: add retry with exponential backoff
  (covers Signal and any platform using the shared cache helper)
- mattermost.py _send_media_url: retry on 429/5xx/timeout (3 attempts)
- slack.py _download_slack_file: retry on timeout/5xx (3 attempts)
- slack.py _download_slack_file_bytes: same retry pattern

* test: add tests for media download retry

---------

Co-authored-by: dieutx <dangtc94@gmail.com>
											
										
										
											2026-03-26 19:33:18 -07:00
+								                    await asyncio.sleep(wait)
 								                    continue
 								                raise
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
 								def cleanup_image_cache(max_age_hours: int = 24) -> int:
 								    """
 								    Delete cached images older than *max_age_hours*.
 								    Returns the number of files removed.
 								    """
 								    import time
 								    cache_dir = get_image_cache_dir()
 								    cutoff = time.time() - (max_age_hours * 3600)
 								    removed = 0
 								    for f in cache_dir.iterdir():
 								        if f.is_file() and f.stat().st_mtime < cutoff:
 								            try:
 								                f.unlink()
 								                removed += 1
 								            except OSError:
 								                pass
 								    return removed
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								# ---------------------------------------------------------------------------
 								# Audio cache utilities
 								#
 								# Same pattern as image cache -- voice messages from platforms are downloaded
 								# here so the STT tool (OpenAI Whisper) can transcribe them from local files.
 								# ---------------------------------------------------------------------------
-												refactor: consolidate ~/.hermes directory layout with backward compat (#3610)

New installs get a cleaner structure:
  cache/images/      (was image_cache/)
  cache/audio/       (was audio_cache/)
  cache/documents/   (was document_cache/)
  cache/screenshots/ (was browser_screenshots/)
  platforms/whatsapp/session/ (was whatsapp/session/)
  platforms/matrix/store/    (was matrix/store/)
  platforms/pairing/         (was pairing/)

Existing installs are unaffected -- get_hermes_dir() checks for the
old path first and uses it if present. No migration needed.

Adds get_hermes_dir(new_subpath, old_name) helper to hermes_constants.py
for reuse by any future subsystem.
											
										
										
											2026-03-28 15:22:19 -07:00
+								AUDIO_CACHE_DIR = get_hermes_dir("cache/audio", "audio_cache")
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
 								def get_audio_cache_dir() -> Path:
 								    """Return the audio cache directory, creating it if it doesn't exist."""
 								    AUDIO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
 								    return AUDIO_CACHE_DIR
 								def cache_audio_from_bytes(data: bytes, ext: str = ".ogg") -> str:
 								    """
 								    Save raw audio bytes to the cache and return the absolute file path.
 								    Args:
 								        data: Raw audio bytes.
 								        ext:  File extension including the dot (e.g. ".ogg", ".mp3").
 								    Returns:
 								        Absolute path to the cached audio file as a string.
 								    """
 								    cache_dir = get_audio_cache_dir()
 								    filename = f"audio_{uuid.uuid4().hex[:12]}{ext}"
 								    filepath = cache_dir / filename
 								    filepath.write_bytes(data)
 								    return str(filepath)
-												fix: add download retry to cache_audio_from_url matching cache_image_from_url (#3401)

PR #3323 added retry with exponential backoff to cache_image_from_url
but missed the sibling function cache_audio_from_url 18 lines below in
the same file. A single transient 429/5xx/timeout loses voice messages
while image downloads now survive them.

Apply the same retry pattern: 3 attempts with 1.5s exponential backoff,
immediate raise on non-retryable 4xx.
											
										
										
											2026-03-29 07:28:38 +07:00
+								async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) -> str:
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								    """
 								    Download an audio file from a URL and save it to the local cache.
-												fix: add download retry to cache_audio_from_url matching cache_image_from_url (#3401)

PR #3323 added retry with exponential backoff to cache_image_from_url
but missed the sibling function cache_audio_from_url 18 lines below in
the same file. A single transient 429/5xx/timeout loses voice messages
while image downloads now survive them.

Apply the same retry pattern: 3 attempts with 1.5s exponential backoff,
immediate raise on non-retryable 4xx.
											
										
										
											2026-03-29 07:28:38 +07:00
+								    Retries on transient failures (timeouts, 429, 5xx) with exponential
 								    backoff so a single slow CDN response doesn't lose the media.
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								    Args:
 								        url: The HTTP/HTTPS URL to download from.
 								        ext: File extension including the dot (e.g. ".ogg", ".mp3").
-												fix: add download retry to cache_audio_from_url matching cache_image_from_url (#3401)

PR #3323 added retry with exponential backoff to cache_image_from_url
but missed the sibling function cache_audio_from_url 18 lines below in
the same file. A single transient 429/5xx/timeout loses voice messages
while image downloads now survive them.

Apply the same retry pattern: 3 attempts with 1.5s exponential backoff,
immediate raise on non-retryable 4xx.
											
										
										
											2026-03-29 07:28:38 +07:00
+								        retries: Number of retry attempts on transient failures.
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
 								    Returns:
 								        Absolute path to the cached audio file as a string.
-												fix(security): consolidated security hardening — SSRF, timing attack, tar traversal, credential leakage (#5944)

Salvaged from PRs #5800 (memosr), #5806 (memosr), #5915 (Ruzzgar), #5928 (Awsh1).

Changes:
- Use hmac.compare_digest for API key comparison (timing attack prevention)
- Apply provider env var blocklist to Docker containers (credential leakage)
- Replace tar.extractall() with safe extraction in TerminalBench2 (CVE-2007-4559)
- Add SSRF protection via is_safe_url to ALL platform adapters:
  base.py (cache_image_from_url, cache_audio_from_url),
  discord, slack, telegram, matrix, mattermost, feishu, wecom
  (Signal and WhatsApp protected via base.py helpers)
- Update tests: mock is_safe_url in Mattermost download tests
- Add security tests for tar extraction (traversal, symlinks, safe files)
											
										
										
											2026-04-07 17:28:37 -07:00
 								    Raises:
 								        ValueError: If the URL targets a private/internal network (SSRF protection).
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								    """
-												fix(security): consolidated security hardening — SSRF, timing attack, tar traversal, credential leakage (#5944)

Salvaged from PRs #5800 (memosr), #5806 (memosr), #5915 (Ruzzgar), #5928 (Awsh1).

Changes:
- Use hmac.compare_digest for API key comparison (timing attack prevention)
- Apply provider env var blocklist to Docker containers (credential leakage)
- Replace tar.extractall() with safe extraction in TerminalBench2 (CVE-2007-4559)
- Add SSRF protection via is_safe_url to ALL platform adapters:
  base.py (cache_image_from_url, cache_audio_from_url),
  discord, slack, telegram, matrix, mattermost, feishu, wecom
  (Signal and WhatsApp protected via base.py helpers)
- Update tests: mock is_safe_url in Mattermost download tests
- Add security tests for tar extraction (traversal, symlinks, safe files)
											
										
										
											2026-04-07 17:28:37 -07:00
+								    from tools.url_safety import is_safe_url
 								    if not is_safe_url(url):
-												fix: make safe_url_for_log public, add SSRF redirect guards to base.py cache helpers

Follow-up to Dusk1e's PR #7120 (Slack send_image redirect guard):
- Rename _safe_url_for_log -> safe_url_for_log (drop underscore) since
  it is now imported cross-module by the Slack adapter
- Add _ssrf_redirect_guard httpx event hook to cache_image_from_url()
  and cache_audio_from_url() in base.py — same pattern as vision_tools
  and the Slack adapter fix
- Update url_safety.py docstring to reflect broader coverage
- Add regression tests for image/audio redirect blocking + safe passthrough

											
										
										
											2026-04-10 05:02:17 -07:00
+								        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
-												fix(security): consolidated security hardening — SSRF, timing attack, tar traversal, credential leakage (#5944)

Salvaged from PRs #5800 (memosr), #5806 (memosr), #5915 (Ruzzgar), #5928 (Awsh1).

Changes:
- Use hmac.compare_digest for API key comparison (timing attack prevention)
- Apply provider env var blocklist to Docker containers (credential leakage)
- Replace tar.extractall() with safe extraction in TerminalBench2 (CVE-2007-4559)
- Add SSRF protection via is_safe_url to ALL platform adapters:
  base.py (cache_image_from_url, cache_audio_from_url),
  discord, slack, telegram, matrix, mattermost, feishu, wecom
  (Signal and WhatsApp protected via base.py helpers)
- Update tests: mock is_safe_url in Mattermost download tests
- Add security tests for tar extraction (traversal, symlinks, safe files)
											
										
										
											2026-04-07 17:28:37 -07:00
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								    import httpx
-												refactor: remove redundant local imports already available at module level

Sweep ~74 redundant local imports across 21 files where the same module
was already imported at the top level. Also includes type fixes and lint
cleanups on the same branch.

											
										
										
											2026-04-21 12:35:10 +05:30
+								    _log = logging.getLogger(__name__)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
-												fix: make safe_url_for_log public, add SSRF redirect guards to base.py cache helpers

Follow-up to Dusk1e's PR #7120 (Slack send_image redirect guard):
- Rename _safe_url_for_log -> safe_url_for_log (drop underscore) since
  it is now imported cross-module by the Slack adapter
- Add _ssrf_redirect_guard httpx event hook to cache_image_from_url()
  and cache_audio_from_url() in base.py — same pattern as vision_tools
  and the Slack adapter fix
- Update url_safety.py docstring to reflect broader coverage
- Add regression tests for image/audio redirect blocking + safe passthrough

											
										
										
											2026-04-10 05:02:17 -07:00
+								    async with httpx.AsyncClient(
 								        timeout=30.0,
 								        follow_redirects=True,
 								        event_hooks={"response": [_ssrf_redirect_guard]},
 								    ) as client:
-												fix: add download retry to cache_audio_from_url matching cache_image_from_url (#3401)

PR #3323 added retry with exponential backoff to cache_image_from_url
but missed the sibling function cache_audio_from_url 18 lines below in
the same file. A single transient 429/5xx/timeout loses voice messages
while image downloads now survive them.

Apply the same retry pattern: 3 attempts with 1.5s exponential backoff,
immediate raise on non-retryable 4xx.
											
										
										
											2026-03-29 07:28:38 +07:00
+								        for attempt in range(retries + 1):
 								            try:
 								                response = await client.get(
 								                    url,
 								                    headers={
 								                        "User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
 								                        "Accept": "audio/*,*/*;q=0.8",
 								                    },
 								                )
 								                response.raise_for_status()
 								                return cache_audio_from_bytes(response.content, ext)
 								            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
 								                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
 								                    raise
 								                if attempt < retries:
 								                    wait = 1.5 * (attempt + 1)
-												fix(gateway): sanitize media URLs in base platform logs

											
										
										
											2026-04-06 23:27:54 +03:00
+								                    _log.debug(
 								                        "Audio cache retry %d/%d for %s (%.1fs): %s",
 								                        attempt + 1,
 								                        retries,
-												fix: make safe_url_for_log public, add SSRF redirect guards to base.py cache helpers

Follow-up to Dusk1e's PR #7120 (Slack send_image redirect guard):
- Rename _safe_url_for_log -> safe_url_for_log (drop underscore) since
  it is now imported cross-module by the Slack adapter
- Add _ssrf_redirect_guard httpx event hook to cache_image_from_url()
  and cache_audio_from_url() in base.py — same pattern as vision_tools
  and the Slack adapter fix
- Update url_safety.py docstring to reflect broader coverage
- Add regression tests for image/audio redirect blocking + safe passthrough

											
										
										
											2026-04-10 05:02:17 -07:00
+								                        safe_url_for_log(url),
-												fix(gateway): sanitize media URLs in base platform logs

											
										
										
											2026-04-06 23:27:54 +03:00
+								                        wait,
 								                        exc,
 								                    )
-												fix: add download retry to cache_audio_from_url matching cache_image_from_url (#3401)

PR #3323 added retry with exponential backoff to cache_image_from_url
but missed the sibling function cache_audio_from_url 18 lines below in
the same file. A single transient 429/5xx/timeout loses voice messages
while image downloads now survive them.

Apply the same retry pattern: 3 attempts with 1.5s exponential backoff,
immediate raise on non-retryable 4xx.
											
										
										
											2026-03-29 07:28:38 +07:00
+								                    await asyncio.sleep(wait)
 								                    continue
 								                raise
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
-												fix(telegram): cache inbound videos and accept mp4 uploads

											
										
										
											2026-04-17 10:50:23 +05:30
+								# ---------------------------------------------------------------------------
 								# Video cache utilities
 								#
 								# Same pattern as image/audio cache -- videos from platforms are downloaded
 								# here so the agent can reference them by local file path.
 								# ---------------------------------------------------------------------------
 								VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache")
 								SUPPORTED_VIDEO_TYPES = {
 								    ".mp4": "video/mp4",
 								    ".mov": "video/quicktime",
 								    ".webm": "video/webm",
 								    ".mkv": "video/x-matroska",
 								    ".avi": "video/x-msvideo",
 								}
 								def get_video_cache_dir() -> Path:
 								    """Return the video cache directory, creating it if it doesn't exist."""
 								    VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
 								    return VIDEO_CACHE_DIR
 								def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
 								    """Save raw video bytes to the cache and return the absolute file path."""
 								    cache_dir = get_video_cache_dir()
 								    filename = f"video_{uuid.uuid4().hex[:12]}{ext}"
 								    filepath = cache_dir / filename
 								    filepath.write_bytes(data)
 								    return str(filepath)
-												feat(telegram): add document file processing for PDF, text, and Office files

Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:44:57 -05:00
+								# ---------------------------------------------------------------------------
 								# Document cache utilities
 								#
 								# Same pattern as image/audio cache -- documents from platforms are downloaded
 								# here so the agent can reference them by local file path.
 								# ---------------------------------------------------------------------------
-												refactor: consolidate ~/.hermes directory layout with backward compat (#3610)

New installs get a cleaner structure:
  cache/images/      (was image_cache/)
  cache/audio/       (was audio_cache/)
  cache/documents/   (was document_cache/)
  cache/screenshots/ (was browser_screenshots/)
  platforms/whatsapp/session/ (was whatsapp/session/)
  platforms/matrix/store/    (was matrix/store/)
  platforms/pairing/         (was pairing/)

Existing installs are unaffected -- get_hermes_dir() checks for the
old path first and uses it if present. No migration needed.

Adds get_hermes_dir(new_subpath, old_name) helper to hermes_constants.py
for reuse by any future subsystem.
											
										
										
											2026-03-28 15:22:19 -07:00
+								DOCUMENT_CACHE_DIR = get_hermes_dir("cache/documents", "document_cache")
-												feat(telegram): add document file processing for PDF, text, and Office files

Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:44:57 -05:00
 								SUPPORTED_DOCUMENT_TYPES = {
 								    ".pdf": "application/pdf",
 								    ".md": "text/markdown",
 								    ".txt": "text/plain",
-												fix(discord): accept .log attachments and raise document size limit

											
										
										
											2026-04-09 03:15:09 -04:00
+								    ".log": "text/plain",
-												feat: add .zip document support and auto-mount cache dirs into remote backends (#4846)

- Add .zip to SUPPORTED_DOCUMENT_TYPES so gateway platforms (Telegram,
  Slack, Discord) cache uploaded zip files instead of rejecting them.
- Add get_cache_directory_mounts() and iter_cache_files() to
  credential_files.py for host-side cache directory passthrough
  (documents, images, audio, screenshots).
- Docker: bind-mount cache dirs read-only alongside credentials/skills.
  Changes are live (bind mount semantics).
- Modal: mount cache files at sandbox creation + resync before each
  command via _sync_files() with mtime+size change detection.
- Handles backward-compat with legacy dir names (document_cache,
  image_cache, audio_cache, browser_screenshots) via get_hermes_dir().
- Container paths always use the new cache/<subdir> layout regardless
  of host layout.

This replaces the need for a dedicated extract_archive tool (PR #4819)
— the agent can now use standard terminal commands (unzip, tar) on
uploaded files inside remote containers.

Closes: related to PR #4819 by kshitijk4poor
											
										
										
											2026-04-03 13:16:26 -07:00
+								    ".zip": "application/zip",
-												feat(telegram): add document file processing for PDF, text, and Office files

Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:44:57 -05:00
+								    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
 								    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
 								    ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
 								}
 								def get_document_cache_dir() -> Path:
 								    """Return the document cache directory, creating it if it doesn't exist."""
 								    DOCUMENT_CACHE_DIR.mkdir(parents=True, exist_ok=True)
 								    return DOCUMENT_CACHE_DIR
 								def cache_document_from_bytes(data: bytes, filename: str) -> str:
 								    """
 								    Save raw document bytes to the cache and return the absolute file path.
 								    The cached filename preserves the original human-readable name with a
 								    unique prefix: ``doc_{uuid12}_{original_filename}``.
 								    Args:
 								        data: Raw document bytes.
 								        filename: Original filename (e.g. "report.pdf").
 								    Returns:
 								        Absolute path to the cached document file as a string.
-												fix(security): patch path traversal, size bypass, and prompt injection in document processing

- Sanitize filenames in cache_document_from_bytes to prevent path traversal (strip directory components, null bytes, resolve check)
- Reject documents with None file_size instead of silently allowing download
- Cap text file injection at 100 KB to prevent oversized prompt payloads
- Sanitize display_name in run.py context notes to block prompt injection via filenames
- Add 35 unit tests covering document cache utilities and Telegram document handling

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:53:46 -05:00
 								    Raises:
 								        ValueError: If the sanitized path escapes the cache directory.
-												feat(telegram): add document file processing for PDF, text, and Office files

Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:44:57 -05:00
+								    """
 								    cache_dir = get_document_cache_dir()
-												fix(security): patch path traversal, size bypass, and prompt injection in document processing

- Sanitize filenames in cache_document_from_bytes to prevent path traversal (strip directory components, null bytes, resolve check)
- Reject documents with None file_size instead of silently allowing download
- Cap text file injection at 100 KB to prevent oversized prompt payloads
- Sanitize display_name in run.py context notes to block prompt injection via filenames
- Add 35 unit tests covering document cache utilities and Telegram document handling

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:53:46 -05:00
+								    # Sanitize: strip directory components, null bytes, and control characters
 								    safe_name = Path(filename).name if filename else "document"
 								    safe_name = safe_name.replace("\x00", "").strip()
 								    if not safe_name or safe_name in (".", ".."):
 								        safe_name = "document"
-												feat(telegram): add document file processing for PDF, text, and Office files

Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:44:57 -05:00
+								    cached_name = f"doc_{uuid.uuid4().hex[:12]}_{safe_name}"
 								    filepath = cache_dir / cached_name
-												fix(security): patch path traversal, size bypass, and prompt injection in document processing

- Sanitize filenames in cache_document_from_bytes to prevent path traversal (strip directory components, null bytes, resolve check)
- Reject documents with None file_size instead of silently allowing download
- Cap text file injection at 100 KB to prevent oversized prompt payloads
- Sanitize display_name in run.py context notes to block prompt injection via filenames
- Add 35 unit tests covering document cache utilities and Telegram document handling

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:53:46 -05:00
+								    # Final safety check: ensure path stays inside cache dir
 								    if not filepath.resolve().is_relative_to(cache_dir.resolve()):
 								        raise ValueError(f"Path traversal rejected: {filename!r}")
-												feat(telegram): add document file processing for PDF, text, and Office files

Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:44:57 -05:00
+								    filepath.write_bytes(data)
 								    return str(filepath)
 								def cleanup_document_cache(max_age_hours: int = 24) -> int:
 								    """
 								    Delete cached documents older than *max_age_hours*.
 								    Returns the number of files removed.
 								    """
 								    import time
 								    cache_dir = get_document_cache_dir()
 								    cutoff = time.time() - (max_age_hours * 3600)
 								    removed = 0
 								    for f in cache_dir.iterdir():
 								        if f.is_file() and f.stat().st_mtime < cutoff:
 								            try:
 								                f.unlink()
 								                removed += 1
 								            except OSError:
 								                pass
 								    return removed
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								class MessageType(Enum):
 								    """Types of incoming messages."""
 								    TEXT = "text"
-												feat: find-nearby skill and Telegram location support

Adds a 'find-nearby' skill for discovering nearby places using
OpenStreetMap (Overpass + Nominatim). No API keys needed. Works with:
- Coordinates (from Telegram location pins)
- Addresses, cities, zip codes, landmarks (auto-geocoded)
- Multiple place types (restaurant, cafe, bar, pharmacy, etc.)

Returns names, distances, cuisine, hours, addresses, and Google Maps
links (pin + directions). 184-line stdlib-only script.

Also adds Telegram location message handling:
- New MessageType.LOCATION in gateway base
- Telegram adapter handles LOCATION and VENUE messages
- Injects lat/lon coordinates into conversation context
- Prompts agent to ask what the user wants nearby

Inspired by PR #422 (reimplemented with simpler script and broader
skill scope — addresses/cities/zips, not just Telegram coordinates).

											
										
										
											2026-03-09 05:31:10 -07:00
+								    LOCATION = "location"
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    PHOTO = "photo"
 								    VIDEO = "video"
 								    AUDIO = "audio"
 								    VOICE = "voice"
 								    DOCUMENT = "document"
 								    STICKER = "sticker"
 								    COMMAND = "command"  # /command style
-												fix(gateway): avoid false failure reactions on restart cancellation

											
										
										
											2026-04-08 16:07:07 -07:00
+								class ProcessingOutcome(Enum):
 								    """Result classification for message-processing lifecycle hooks."""
 								    SUCCESS = "success"
 								    FAILURE = "failure"
 								    CANCELLED = "cancelled"
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								@dataclass
 								class MessageEvent:
 								    """
 								    Incoming message from a platform.
 								    Normalized representation that all adapters produce.
 								    """
 								    # Message content
 								    text: str
 								    message_type: MessageType = MessageType.TEXT
 								    # Source information
 								    source: SessionSource = None
 								    # Original platform data
 								    raw_message: Any = None
 								    message_id: Optional[str] = None
-												fix(gateway): ignore redelivered /restart after PTB offset ACK fails (#11940)

When a Telegram /restart fires and PTB's graceful-shutdown `get_updates`
ACK call times out ("When polling for updates is restarted, updates may
be received twice" in gateway.log), the new gateway receives the same
/restart again and restarts a second time — a self-perpetuating loop.

Record the triggering update_id in `.restart_last_processed.json` when
handling /restart.  On the next process, reject a /restart whose
update_id <= the recorded one as a stale redelivery.  5-minute staleness
guard so an orphaned marker can't block a legitimately new /restart.

- gateway/platforms/base.py: add `platform_update_id` to MessageEvent
- gateway/platforms/telegram.py: propagate `update.update_id` through
  _build_message_event for text/command/location/media handlers
- gateway/run.py: write dedup marker in _handle_restart_command;
  _is_stale_restart_redelivery checks it before processing /restart
- tests/gateway/test_restart_redelivery_dedup.py: 9 new tests covering
  fresh restart, redelivery, staleness window, cross-platform,
  malformed-marker resilience, and no-update_id (CLI) bypass

Only active for Telegram today (the one platform with monotonic
cross-session update ordering); other platforms return False from
_is_stale_restart_redelivery and proceed normally.
											
										
										
											2026-04-17 21:17:33 -07:00
 								    # Platform-specific update identifier.  For Telegram this is the
 								    # ``update_id`` from the PTB Update wrapper; other platforms currently
 								    # ignore it.  Used by ``/restart`` to record the triggering update so the
 								    # new gateway can advance the Telegram offset past it and avoid processing
 								    # the same ``/restart`` twice if PTB's graceful-shutdown ACK times out
 								    # ("Error while calling `get_updates` one more time to mark all fetched
 								    # updates" in gateway.log).
 								    platform_update_id: Optional[int] = None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    # Media attachments
-												fix(gateway): prevent telegram photo burst interrupts

											
										
										
											2026-03-15 11:58:19 +05:30
+								    # media_urls: local file paths (for vision tool access)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    media_urls: List[str] = field(default_factory=list)
 								    media_types: List[str] = field(default_factory=list)
 								    # Reply context
 								    reply_to_message_id: Optional[str] = None
-												feat(gateway): inject reply-to message context for out-of-session replies (#1594)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

* feat(gateway): inject reply-to message context for out-of-session replies (#1594)

When a user replies to a Telegram message, check if the quoted text
exists in the current session transcript. If missing (from cron jobs,
background tasks, or old sessions), prepend [Replying to: "..."] to
the message so the agent has context about what's being referenced.

- Add reply_to_text field to MessageEvent (base.py)
- Populate from Telegram's reply_to_message (text or caption)
- Inject context in _handle_message when not found in history

Based on PR #1596 by anpicasso (cherry-picked reply-to feature only,
excluded unrelated /server command and background delegation changes).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
Co-authored-by: Angello Picasso <angello.picasso@devsu.com>
											
										
										
											2026-03-17 02:31:27 -07:00
+								    reply_to_text: Optional[str] = None  # Text of the replied-to message (for context injection)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat(discord): add channel_skill_bindings for auto-loading skills per channel

Simplified implementation of the feature from PR #6842 (RunzhouLi).
Allows Discord channels/forum threads to auto-bind skills via config:

    discord:
      channel_skill_bindings:
        - id: "123456"
          skills: ["skill-a", "skill-b"]

The run.py auto-skill loader now handles both str and list[str],
loading multiple skills in order and concatenating their payloads.
Forum threads inherit their parent channel's bindings.

Co-authored-by: RunzhouLi <RunzhouLi@users.noreply.github.com>

											
										
										
											2026-04-10 05:06:05 -07:00
+								    # Auto-loaded skill(s) for topic/channel bindings (e.g., Telegram DM Topics,
 								    # Discord channel_skill_bindings).  A single name or ordered list.
 								    auto_skill: Optional[str | list[str]] = None
-												feat(discord): add channel_prompts config

Add native Discord channel_prompts support with parent forum fallback,
ephemeral runtime injection, config migration updates, docs, and tests.

											
										
										
											2026-04-13 15:57:03 -07:00
 								    # Per-channel ephemeral system prompt (e.g. Discord channel_prompts).
 								    # Applied at API call time and never persisted to transcript history.
 								    channel_prompt: Optional[str] = None
-												feat(telegram): Private Chat Topics with functional skill binding (#2598)

Salvages PR #3005 by web3blind. Cherry-picked onto current main with functional skill binding and docs added.

- DM topic creation via createForumTopic (Bot API 9.4, Feb 2026)
- Config-driven topics with thread_id persistence across restarts
- Session isolation via existing build_session_key thread_id support
- auto_skill field on MessageEvent for topic-skill bindings
- Gateway auto-loads bound skill on new sessions (same as /skill commands)
- Docs: full Private Chat Topics section in Telegram messaging guide
- 20 tests (17 original + 3 for auto_skill)

Closes #2598
Co-authored-by: web3blind <web3blind@users.noreply.github.com>
											
										
										
											2026-03-26 02:04:11 -07:00
-												fix(gateway): prevent background process notifications from triggering false pairing requests

When a background process with notify_on_complete=True finishes, the
gateway injects a synthetic MessageEvent to notify the session. This
event was constructed without user_id, causing _is_user_authorized()
to reject it and — for DM-origin sessions — trigger the pairing flow,
sending "Hi~ I don't recognize you yet!" with a pairing code to the
chat owner.

Add an `internal` flag to MessageEvent that bypasses authorization
checks for system-generated synthetic events. Only the process watcher
sets this flag; no external/adapter code path can produce it.

Includes 4 regression tests covering the fix and the normal pairing path.

											
										
										
											2026-04-08 11:21:24 +10:00
+								    # Internal flag — set for synthetic events (e.g. background process
 								    # completion notifications) that must bypass user authorization checks.
 								    internal: bool = False
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    # Timestamps
 								    timestamp: datetime = field(default_factory=datetime.now)
 								    def is_command(self) -> bool:
 								        """Check if this is a command message (e.g., /new, /reset)."""
 								        return self.text.startswith("/")
 								    def get_command(self) -> Optional[str]:
 								        """Extract command name if this is a command message."""
 								        if not self.is_command():
 								            return None
 								        # Split on space and get first word, strip the /
 								        parts = self.text.split(maxsplit=1)
-												fix: strip @botname from commands so /new@TigerNanoBot resolves correctly (#3581)

Commands sent directly to the bot in groups include @botname suffix
(e.g. /compress@TigerNanoBot). get_command() now strips the @anything
part before lookup, matching how Telegram bot menu generates commands.
Fixes all slash commands silently doing nothing when sent with @mention.

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>
											
										
										
											2026-03-28 14:01:01 -07:00
+								        raw = parts[0][1:].lower() if parts else None
 								        if raw and "@" in raw:
 								            raw = raw.split("@", 1)[0]
-												fix(gateway): reject file paths in get_command() + file-drop tests (#7356)

Gateway get_command() now rejects paths containing /. Also adds 28 _detect_file_drop regression tests. From #6978 (@ygd58) and #6963 (@betamod).
											
										
										
											2026-04-10 13:06:02 -07:00
+								        # Reject file paths: valid command names never contain /
 								        if raw and "/" in raw:
 								            return None
-												fix: strip @botname from commands so /new@TigerNanoBot resolves correctly (#3581)

Commands sent directly to the bot in groups include @botname suffix
(e.g. /compress@TigerNanoBot). get_command() now strips the @anything
part before lookup, matching how Telegram bot menu generates commands.
Fixes all slash commands silently doing nothing when sent with @mention.

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>
											
										
										
											2026-03-28 14:01:01 -07:00
+								        return raw
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    def get_command_args(self) -> str:
 								        """Get the arguments after a command."""
 								        if not self.is_command():
 								            return self.text
 								        parts = self.text.split(maxsplit=1)
-												fix: normalize iOS unicode dashes in slash command args

iOS auto-corrects -- to — (em dash) and - to – (en dash), causing
commands like /model glm-4.7 —provider zai to fail with
'Model names cannot contain spaces'. Normalize at get_command_args().

											
										
										
											2026-04-11 19:38:56 +08:00
+								        args = parts[1] if len(parts) > 1 else ""
 								        # iOS auto-corrects -- to — (em dash) and - to – (en dash)
 								        args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
 								        return args
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								@dataclass
 								class SendResult:
 								    """Result of sending a message."""
 								    success: bool
 								    message_id: Optional[str] = None
 								    error: Optional[str] = None
 								    raw_response: Any = None
-												fix(telegram): prevent duplicate message delivery on send timeout (#5153)

TimedOut is a subclass of NetworkError in python-telegram-bot. The
inner retry loop in send() and the outer _send_with_retry() in base.py
both treated it as a transient connection error and retried — but
send_message is not idempotent. When the request reaches Telegram but
the HTTP response times out, the message is already delivered. Retrying
sends duplicates. Worst case: up to 9 copies (inner 3x × outer 3x).

Inner loop (telegram.py):
- Import TimedOut separately, isinstance-check before generic
  NetworkError retry (same pattern as BadRequest carve-out from #3390)
- Re-raise immediately — no retry
- Mark as retryable=False in outer exception handler

Outer loop (base.py):
- Remove 'timeout', 'timed out', 'readtimeout', 'writetimeout' from
  _RETRYABLE_ERROR_PATTERNS (read/write timeouts are delivery-ambiguous)
- Add 'connecttimeout' (safe — connection never established)
- Keep 'network' (other platforms still need it)
- Add _is_timeout_error() + early return to prevent plain-text fallback
  on timeout errors (would also cause duplicate delivery)

Connection errors (ConnectionReset, ConnectError, etc.) are still
retried — these fail before the request reaches the server.

Credit: tmdgusya (PR #3899), barun1997 (PR #3904) for identifying the
bug and proposing fixes.

Closes #3899, closes #3904.
											
										
										
											2026-04-04 19:05:34 -07:00
+								    retryable: bool = False  # True for transient connection errors — base will retry automatically
-												fix(gateway): retry transient send failures and notify user on exhaustion (#3288)

When send() fails due to a network error (ConnectError, ReadTimeout, etc.),
the failure was silently logged and the user received no feedback — appearing
as a hang. In one reported case, a user waited 1+ hour for a response that
had already been generated but failed to deliver (#2910).

Adds _send_with_retry() to BasePlatformAdapter:
- Transient errors: retry up to 2x with exponential backoff + jitter
- On exhaustion: send delivery-failure notice so user knows to retry
- Permanent errors: fall back to plain-text version (preserves existing behavior)
- SendResult.retryable flag for platform-specific transient errors

All adapters benefit automatically via BasePlatformAdapter inheritance.

Cherry-picked from PR #3108 by Mibayy.

Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 17:37:10 -07:00
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								def merge_pending_message_event(
 								    pending_messages: Dict[str, MessageEvent],
 								    session_key: str,
 								    event: MessageEvent,
-												fix: keep rapid telegram follow-ups from getting cut off

											
										
										
											2026-04-14 16:27:12 -04:00
+								    *,
 								    merge_text: bool = False,
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								) -> None:
 								    """Store or merge a pending event for a session.
 								    Photo bursts/albums often arrive as multiple near-simultaneous PHOTO
 								    events. Merge those into the existing queued event so the next turn sees
-												fix: keep rapid telegram follow-ups from getting cut off

											
										
										
											2026-04-14 16:27:12 -04:00
+								    the whole burst.
 								    When ``merge_text`` is enabled, rapid follow-up TEXT events are appended
 								    instead of replacing the pending turn. This is used for Telegram bursty
 								    follow-ups so a multi-part user thought is not silently truncated to only
 								    the last queued fragment.
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								    """
 								    existing = pending_messages.get(session_key)
-												fix: keep rapid telegram follow-ups from getting cut off

											
										
										
											2026-04-14 16:27:12 -04:00
+								    if existing:
 								        existing_is_photo = getattr(existing, "message_type", None) == MessageType.PHOTO
 								        incoming_is_photo = event.message_type == MessageType.PHOTO
 								        existing_has_media = bool(existing.media_urls)
 								        incoming_has_media = bool(event.media_urls)
 								        if existing_is_photo and incoming_is_photo:
 								            existing.media_urls.extend(event.media_urls)
 								            existing.media_types.extend(event.media_types)
 								            if event.text:
 								                existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
 								            return
 								        if existing_has_media or incoming_has_media:
 								            if incoming_has_media:
 								                existing.media_urls.extend(event.media_urls)
 								                existing.media_types.extend(event.media_types)
 								            if event.text:
 								                if existing.text:
 								                    existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
 								                else:
 								                    existing.text = event.text
 								            if existing_is_photo or incoming_is_photo:
 								                existing.message_type = MessageType.PHOTO
 								            return
 								        if (
 								            merge_text
 								            and getattr(existing, "message_type", None) == MessageType.TEXT
 								            and event.message_type == MessageType.TEXT
 								        ):
 								            if event.text:
 								                existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
 								            return
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								    pending_messages[session_key] = event
-												fix(telegram): prevent duplicate message delivery on send timeout (#5153)

TimedOut is a subclass of NetworkError in python-telegram-bot. The
inner retry loop in send() and the outer _send_with_retry() in base.py
both treated it as a transient connection error and retried — but
send_message is not idempotent. When the request reaches Telegram but
the HTTP response times out, the message is already delivered. Retrying
sends duplicates. Worst case: up to 9 copies (inner 3x × outer 3x).

Inner loop (telegram.py):
- Import TimedOut separately, isinstance-check before generic
  NetworkError retry (same pattern as BadRequest carve-out from #3390)
- Re-raise immediately — no retry
- Mark as retryable=False in outer exception handler

Outer loop (base.py):
- Remove 'timeout', 'timed out', 'readtimeout', 'writetimeout' from
  _RETRYABLE_ERROR_PATTERNS (read/write timeouts are delivery-ambiguous)
- Add 'connecttimeout' (safe — connection never established)
- Keep 'network' (other platforms still need it)
- Add _is_timeout_error() + early return to prevent plain-text fallback
  on timeout errors (would also cause duplicate delivery)

Connection errors (ConnectionReset, ConnectError, etc.) are still
retried — these fail before the request reaches the server.

Credit: tmdgusya (PR #3899), barun1997 (PR #3904) for identifying the
bug and proposing fixes.

Closes #3899, closes #3904.
											
										
										
											2026-04-04 19:05:34 -07:00
+								# Error substrings that indicate a transient *connection* failure worth retrying.
 								# "timeout" / "timed out" / "readtimeout" / "writetimeout" are intentionally
 								# excluded: a read/write timeout on a non-idempotent call (e.g. send_message)
 								# means the request may have reached the server — retrying risks duplicate
 								# delivery.  "connecttimeout" is safe because the connection was never
 								# established.  Platforms that know a timeout is safe to retry should set
 								# SendResult.retryable = True explicitly.
-												fix(gateway): retry transient send failures and notify user on exhaustion (#3288)

When send() fails due to a network error (ConnectError, ReadTimeout, etc.),
the failure was silently logged and the user received no feedback — appearing
as a hang. In one reported case, a user waited 1+ hour for a response that
had already been generated but failed to deliver (#2910).

Adds _send_with_retry() to BasePlatformAdapter:
- Transient errors: retry up to 2x with exponential backoff + jitter
- On exhaustion: send delivery-failure notice so user knows to retry
- Permanent errors: fall back to plain-text version (preserves existing behavior)
- SendResult.retryable flag for platform-specific transient errors

All adapters benefit automatically via BasePlatformAdapter inheritance.

Cherry-picked from PR #3108 by Mibayy.

Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 17:37:10 -07:00
+								_RETRYABLE_ERROR_PATTERNS = (
 								    "connecterror",
 								    "connectionerror",
 								    "connectionreset",
 								    "connectionrefused",
-												fix(telegram): prevent duplicate message delivery on send timeout (#5153)

TimedOut is a subclass of NetworkError in python-telegram-bot. The
inner retry loop in send() and the outer _send_with_retry() in base.py
both treated it as a transient connection error and retried — but
send_message is not idempotent. When the request reaches Telegram but
the HTTP response times out, the message is already delivered. Retrying
sends duplicates. Worst case: up to 9 copies (inner 3x × outer 3x).

Inner loop (telegram.py):
- Import TimedOut separately, isinstance-check before generic
  NetworkError retry (same pattern as BadRequest carve-out from #3390)
- Re-raise immediately — no retry
- Mark as retryable=False in outer exception handler

Outer loop (base.py):
- Remove 'timeout', 'timed out', 'readtimeout', 'writetimeout' from
  _RETRYABLE_ERROR_PATTERNS (read/write timeouts are delivery-ambiguous)
- Add 'connecttimeout' (safe — connection never established)
- Keep 'network' (other platforms still need it)
- Add _is_timeout_error() + early return to prevent plain-text fallback
  on timeout errors (would also cause duplicate delivery)

Connection errors (ConnectionReset, ConnectError, etc.) are still
retried — these fail before the request reaches the server.

Credit: tmdgusya (PR #3899), barun1997 (PR #3904) for identifying the
bug and proposing fixes.

Closes #3899, closes #3904.
											
										
										
											2026-04-04 19:05:34 -07:00
+								    "connecttimeout",
-												fix(gateway): retry transient send failures and notify user on exhaustion (#3288)

When send() fails due to a network error (ConnectError, ReadTimeout, etc.),
the failure was silently logged and the user received no feedback — appearing
as a hang. In one reported case, a user waited 1+ hour for a response that
had already been generated but failed to deliver (#2910).

Adds _send_with_retry() to BasePlatformAdapter:
- Transient errors: retry up to 2x with exponential backoff + jitter
- On exhaustion: send delivery-failure notice so user knows to retry
- Permanent errors: fall back to plain-text version (preserves existing behavior)
- SendResult.retryable flag for platform-specific transient errors

All adapters benefit automatically via BasePlatformAdapter inheritance.

Cherry-picked from PR #3108 by Mibayy.

Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 17:37:10 -07:00
+								    "network",
 								    "broken pipe",
 								    "remotedisconnected",
 								    "eoferror",
 								)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								# Type for message handlers
 								MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
-												feat: extend channel_prompts to Telegram, Slack, and Mattermost

Extract resolve_channel_prompt() shared helper into
gateway/platforms/base.py. Refactor Discord to use it.
Wire channel_prompts into Telegram (groups + forum topics),
Slack (channels), and Mattermost (channels).

Config bridging now applies to all platforms (not just Discord).
Added channel_prompts defaults to telegram/slack/mattermost
config sections.

Docs added to all four platform pages with platform-specific
examples (topic inheritance for Telegram, channel IDs for Slack,
etc.).

											
										
										
											2026-04-15 16:26:26 -07:00
+								def resolve_channel_prompt(
 								    config_extra: dict,
 								    channel_id: str,
 								    parent_id: str | None = None,
 								) -> str | None:
 								    """Resolve a per-channel ephemeral prompt from platform config.
 								    Looks up ``channel_prompts`` in the adapter's ``config.extra`` dict.
 								    Prefers an exact match on *channel_id*; falls back to *parent_id*
 								    (useful for forum threads / child channels inheriting a parent prompt).
 								    Returns the prompt string, or None if no match is found.  Blank/whitespace-
 								    only prompts are treated as absent.
 								    """
 								    prompts = config_extra.get("channel_prompts") or {}
 								    if not isinstance(prompts, dict):
 								        return None
 								    for key in (channel_id, parent_id):
 								        if not key:
 								            continue
 								        prompt = prompts.get(key)
 								        if prompt is None:
 								            continue
 								        prompt = str(prompt).strip()
 								        if prompt:
 								            return prompt
 								    return None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								class BasePlatformAdapter(ABC):
 								    """
 								    Base class for platform adapters.
 								    Subclasses implement platform-specific logic for:
 								    - Connecting and authenticating
 								    - Receiving messages
 								    - Sending messages/responses
 								    - Handling media
 								    """
 								    def __init__(self, config: PlatformConfig, platform: Platform):
 								        self.config = config
 								        self.platform = platform
 								        self._message_handler: Optional[MessageHandler] = None
 								        self._running = False
-												fix(gateway): harden Telegram polling conflict handling

- detect Telegram getUpdates conflicts and stop polling cleanly instead of retry-spamming forever
- add a machine-local token-scoped lock so different HERMES_HOME profiles on the same host can't poll the same bot token at once
- persist gateway runtime health/fatal adapter state and surface it in ● hermes-gateway.service - Hermes Agent Gateway - Messaging Platform Integration
     Loaded: loaded (/home/teknium/.config/systemd/user/hermes-gateway.service; enabled; preset: enabled)
     Active: active (running) since Sat 2026-03-14 09:25:35 PDT; 2h 45min ago
 Invocation: 8879379b25994201b98381f4bd80c2af
   Main PID: 1147926 (python)
      Tasks: 16 (limit: 76757)
     Memory: 151.4M (peak: 168.1M)
        CPU: 47.883s
     CGroup: /user.slice/user-1000.slice/user@1000.service/app.slice/hermes-gateway.service
             ├─1147926 /home/teknium/.hermes/hermes-agent/venv/bin/python -m hermes_cli.main gateway run --replace
             └─1147966 node /home/teknium/.hermes/hermes-agent/scripts/whatsapp-bridge/bridge.js --port 3000 --session /home/teknium/.hermes/whatsapp/session --mode self-chat

Mar 14 09:27:03 teknium-dev python[1147926]: 🔄 Retrying API call (2/3)...
Mar 14 09:27:04 teknium-dev python[1147926]: [409B blob data]
Mar 14 09:27:04 teknium-dev python[1147926]:    Content: ''
Mar 14 09:27:04 teknium-dev python[1147926]: ❌ Max retries (3) for empty content exceeded.
Mar 14 09:27:07 teknium-dev python[1147926]: [1K blob data]
Mar 14 09:27:07 teknium-dev python[1147926]:    Content: ''
Mar 14 09:27:07 teknium-dev python[1147926]: 🔄 Retrying API call (1/3)...
Mar 14 09:27:12 teknium-dev python[1147926]: [1.7K blob data]
Mar 14 09:27:12 teknium-dev python[1147926]:    Content: ''
Mar 14 09:27:12 teknium-dev python[1147926]: 🔄 Retrying API call (2/3)...
⚠ Installed gateway service definition is outdated
  Run: hermes gateway restart  # auto-refreshes the unit

✓ Gateway service is running
✓ Systemd linger is enabled (service survives logout)
- cleanly exit non-retryable startup conflicts without triggering service restart loops

Tests:
- gateway status runtime-state helpers
- Telegram token-lock and polling-conflict behavior
- GatewayRunner clean exit on non-retryable startup conflict
- CLI runtime health summary

											
										
										
											2026-03-14 12:11:23 -07:00
+								        self._fatal_error_code: Optional[str] = None
 								        self._fatal_error_message: Optional[str] = None
 								        self._fatal_error_retryable = True
 								        self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
-												fix(gateway): serialize reset command handoff and heal stale session locks

Closes the adapter-side half of the split-brain described in issue #11016
where _active_sessions stays live but nothing is processing, trapping the
chat in repeated 'Interrupting current task...' while /stop reports no
active task.

Changes on BasePlatformAdapter:
- Add _session_tasks: Dict[str, asyncio.Task] mapping session -> owner task
  so session-terminating commands can cancel the right task and old task
  finally blocks can't clobber a newer task's guard.
- Add _release_session_guard(guard=...) that only releases if the guard
  Event still matches, preventing races where /stop or /new swaps in a
  temporary guard while the old task unwinds.
- Add _session_task_is_stale() and _heal_stale_session_lock() for
  on-entry self-heal: when handle_message() sees an _active_sessions
  entry whose RECORDED owner task is done/cancelled, clear it and fall
  through to normal dispatch.  No owner task recorded = not stale (some
  tests install guards directly and shouldn't be auto-healed).
- Add cancel_session_processing() as the explicit adapter-side cancel
  API so /stop/ /new/ /reset can cleanly tear down in-flight work.
- Route /stop, /new, /reset through _dispatch_active_session_command():
    1. install a temporary command guard so follow-ups stay queued
    2. let the runner process the command
    3. cancel the old adapter task AFTER the runner response is ready
    4. release the command guard and drain the latest pending follow-up
- _start_session_processing() replaces the inline create_task + guard
  setup in handle_message() so guard + owner-task entry land atomically.
- cancel_background_tasks() also clears _session_tasks.

Combined, this means:
- /stop / /new / /reset actually cancel stuck work instead of leaving
  adapter state desynced from runner state.
- A dead session lock self-heals on the next inbound message rather than
  persisting until gateway restart.
- Follow-up messages after /new are processed in order, after the reset
  command's runner response lands.

Refs #11016

											
										
										
											2026-04-23 03:13:08 -07:00
+								        # Track active message handlers per session for interrupt support.
 								        # _active_sessions stores the per-session interrupt Event; _session_tasks
 								        # maps session → the specific Task currently processing it so that
 								        # session-terminating commands (/stop, /new, /reset) can cancel the
 								        # right task and release the adapter-level guard deterministically.
 								        # Without the owner-task map, an old task's finally block could delete
 								        # a newer task's guard, leaving stale busy state.
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
+								        self._active_sessions: Dict[str, asyncio.Event] = {}
 								        self._pending_messages: Dict[str, MessageEvent] = {}
-												fix(gateway): serialize reset command handoff and heal stale session locks

Closes the adapter-side half of the split-brain described in issue #11016
where _active_sessions stays live but nothing is processing, trapping the
chat in repeated 'Interrupting current task...' while /stop reports no
active task.

Changes on BasePlatformAdapter:
- Add _session_tasks: Dict[str, asyncio.Task] mapping session -> owner task
  so session-terminating commands can cancel the right task and old task
  finally blocks can't clobber a newer task's guard.
- Add _release_session_guard(guard=...) that only releases if the guard
  Event still matches, preventing races where /stop or /new swaps in a
  temporary guard while the old task unwinds.
- Add _session_task_is_stale() and _heal_stale_session_lock() for
  on-entry self-heal: when handle_message() sees an _active_sessions
  entry whose RECORDED owner task is done/cancelled, clear it and fall
  through to normal dispatch.  No owner task recorded = not stale (some
  tests install guards directly and shouldn't be auto-healed).
- Add cancel_session_processing() as the explicit adapter-side cancel
  API so /stop/ /new/ /reset can cleanly tear down in-flight work.
- Route /stop, /new, /reset through _dispatch_active_session_command():
    1. install a temporary command guard so follow-ups stay queued
    2. let the runner process the command
    3. cancel the old adapter task AFTER the runner response is ready
    4. release the command guard and drain the latest pending follow-up
- _start_session_processing() replaces the inline create_task + guard
  setup in handle_message() so guard + owner-task entry land atomically.
- cancel_background_tasks() also clears _session_tasks.

Combined, this means:
- /stop / /new / /reset actually cancel stuck work instead of leaving
  adapter state desynced from runner state.
- A dead session lock self-heals on the next inbound message rather than
  persisting until gateway restart.
- Follow-up messages after /new are processed in order, after the reset
  command's runner response lands.

Refs #11016

											
										
										
											2026-04-23 03:13:08 -07:00
+								        self._session_tasks: Dict[str, asyncio.Task] = {}
-												fix(gateway): cancel active runs during shutdown

Track adapter background message-processing tasks, cancel them during gateway shutdown, and interrupt running agents before disconnecting adapters. This prevents old gateway instances from continuing in-flight work after stop/replace, which was contributing to the restart-time task continuation/flicker behavior reported in #1414. Adds regression coverage for adapter task cancellation and shutdown interrupts.

											
										
										
											2026-03-15 04:21:50 -07:00
+								        # Background message-processing tasks spawned by handle_message().
 								        # Gateway shutdown cancels these so an old gateway instance doesn't keep
 								        # working on a task after --replace or manual restarts.
 								        self._background_tasks: set[asyncio.Task] = set()
-												fix(gateway): defer background review notifications until after main reply

Background review notifications ("💾 Skill created", "💾 Memory updated")
could race ahead of the main assistant reply in chat, making it look like
the agent stopped after creating a skill.

Gate bg-review notifications behind a threading.Event + pending queue.
Register a release callback on the adapter's _post_delivery_callbacks dict
so base.py's finally block fires it after the main response is delivered.

The queued-message path in _run_agent pops and calls the callback directly
to prevent double-fire.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
Closes #10541

											
										
										
											2026-04-15 16:40:38 -07:00
+								        # One-shot callbacks to fire after the main response is delivered.
-												fix: tighten gateway interrupt salvage follow-ups

Follow-up on top of the helix4u #12388 cherry-picks:
- make deferred post-delivery callbacks generation-aware end-to-end so
  stale runs cannot clear callbacks registered by a fresher run for the
  same session
- bind callback ownership to the active session event at run start and
  snapshot that generation inside base adapter processing so later event
  mutation cannot retarget cleanup
- pass run_generation through proxy mode and drop stale proxy streams /
  final results the same way local runs are dropped
- centralize stop/new interrupt cleanup into one helper and replace the
  open-coded branches with shared logic
- unify internal control interrupt reason strings via shared constants
- remove the return from base.py's finally block so cleanup no longer
  swallows cancellation/exception flow
- add focused regressions for generation forwarding, proxy stale
  suppression, and newer-callback preservation

This addresses all review findings from the initial #12388 review while
keeping the fix scoped to stale-output/typing-loop interrupt handling.

											
										
										
											2026-04-19 15:05:14 +05:30
+								        # Keyed by session_key. Values are either a bare callback (legacy) or
 								        # a ``(generation, callback)`` tuple so GatewayRunner can make deferred
 								        # deliveries generation-aware and avoid stale runs clearing callbacks
 								        # registered by a fresher run for the same session.
 								        self._post_delivery_callbacks: Dict[str, Any] = {}
-												fix(gateway): avoid false failure reactions on restart cancellation

											
										
										
											2026-04-08 16:07:07 -07:00
+								        self._expected_cancelled_tasks: set[asyncio.Task] = set()
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								        # Chats where auto-TTS on voice input is disabled (set by /voice off)
 								        self._auto_tts_disabled_chats: set = set()
-												fix: pause typing indicator during approval waits (#5893)

When the agent waits for dangerous-command approval, the typing
indicator (_keep_typing loop) kept refreshing. On Slack's Assistant
API this is critical: assistant_threads_setStatus disables the
compose box, preventing users from typing /approve or /deny.

- Add _typing_paused set + pause/resume methods to BasePlatformAdapter
- _keep_typing skips send_typing when chat_id is paused
- _approval_notify_sync pauses typing before sending approval prompt
- _handle_approve_command / _handle_deny_command resume typing after

Benefits all platforms — no reason to show 'is thinking...' while
the agent is idle waiting for human input.
											
										
										
											2026-04-07 11:04:50 -07:00
+								        # Chats where typing indicator is paused (e.g. during approval waits).
 								        # _keep_typing skips send_typing when the chat_id is in this set.
 								        self._typing_paused: set = set()
-												fix(gateway): harden Telegram polling conflict handling

- detect Telegram getUpdates conflicts and stop polling cleanly instead of retry-spamming forever
- add a machine-local token-scoped lock so different HERMES_HOME profiles on the same host can't poll the same bot token at once
- persist gateway runtime health/fatal adapter state and surface it in ● hermes-gateway.service - Hermes Agent Gateway - Messaging Platform Integration
     Loaded: loaded (/home/teknium/.config/systemd/user/hermes-gateway.service; enabled; preset: enabled)
     Active: active (running) since Sat 2026-03-14 09:25:35 PDT; 2h 45min ago
 Invocation: 8879379b25994201b98381f4bd80c2af
   Main PID: 1147926 (python)
      Tasks: 16 (limit: 76757)
     Memory: 151.4M (peak: 168.1M)
        CPU: 47.883s
     CGroup: /user.slice/user-1000.slice/user@1000.service/app.slice/hermes-gateway.service
             ├─1147926 /home/teknium/.hermes/hermes-agent/venv/bin/python -m hermes_cli.main gateway run --replace
             └─1147966 node /home/teknium/.hermes/hermes-agent/scripts/whatsapp-bridge/bridge.js --port 3000 --session /home/teknium/.hermes/whatsapp/session --mode self-chat

Mar 14 09:27:03 teknium-dev python[1147926]: 🔄 Retrying API call (2/3)...
Mar 14 09:27:04 teknium-dev python[1147926]: [409B blob data]
Mar 14 09:27:04 teknium-dev python[1147926]:    Content: ''
Mar 14 09:27:04 teknium-dev python[1147926]: ❌ Max retries (3) for empty content exceeded.
Mar 14 09:27:07 teknium-dev python[1147926]: [1K blob data]
Mar 14 09:27:07 teknium-dev python[1147926]:    Content: ''
Mar 14 09:27:07 teknium-dev python[1147926]: 🔄 Retrying API call (1/3)...
Mar 14 09:27:12 teknium-dev python[1147926]: [1.7K blob data]
Mar 14 09:27:12 teknium-dev python[1147926]:    Content: ''
Mar 14 09:27:12 teknium-dev python[1147926]: 🔄 Retrying API call (2/3)...
⚠ Installed gateway service definition is outdated
  Run: hermes gateway restart  # auto-refreshes the unit

✓ Gateway service is running
✓ Systemd linger is enabled (service survives logout)
- cleanly exit non-retryable startup conflicts without triggering service restart loops

Tests:
- gateway status runtime-state helpers
- Telegram token-lock and polling-conflict behavior
- GatewayRunner clean exit on non-retryable startup conflict
- CLI runtime health summary

											
										
										
											2026-03-14 12:11:23 -07:00
 								    @property
 								    def has_fatal_error(self) -> bool:
 								        return self._fatal_error_message is not None
 								    @property
 								    def fatal_error_message(self) -> Optional[str]:
 								        return self._fatal_error_message
 								    @property
 								    def fatal_error_code(self) -> Optional[str]:
 								        return self._fatal_error_code
 								    @property
 								    def fatal_error_retryable(self) -> bool:
 								        return self._fatal_error_retryable
 								    def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
 								        self._fatal_error_handler = handler
 								    def _mark_connected(self) -> None:
 								        self._running = True
 								        self._fatal_error_code = None
 								        self._fatal_error_message = None
 								        self._fatal_error_retryable = True
 								        try:
 								            from gateway.status import write_runtime_status
 								            write_runtime_status(platform=self.platform.value, platform_state="connected", error_code=None, error_message=None)
 								        except Exception:
 								            pass
 								    def _mark_disconnected(self) -> None:
 								        self._running = False
 								        if self.has_fatal_error:
 								            return
 								        try:
 								            from gateway.status import write_runtime_status
 								            write_runtime_status(platform=self.platform.value, platform_state="disconnected", error_code=None, error_message=None)
 								        except Exception:
 								            pass
 								    def _set_fatal_error(self, code: str, message: str, *, retryable: bool) -> None:
 								        self._running = False
 								        self._fatal_error_code = code
 								        self._fatal_error_message = message
 								        self._fatal_error_retryable = retryable
 								        try:
 								            from gateway.status import write_runtime_status
 								            write_runtime_status(
 								                platform=self.platform.value,
 								                platform_state="fatal",
 								                error_code=code,
 								                error_message=message,
 								            )
 								        except Exception:
 								            pass
 								    async def _notify_fatal_error(self) -> None:
 								        handler = self._fatal_error_handler
 								        if not handler:
 								            return
 								        result = handler(self)
 								        if asyncio.iscoroutine(result):
 								            await result
-												refactor: extract shared helpers to deduplicate repeated code patterns (#7917)

* refactor: add shared helper modules for code deduplication

New modules:
- gateway/platforms/helpers.py: MessageDeduplicator, TextBatchAggregator,
  strip_markdown, ThreadParticipationTracker, redact_phone
- hermes_cli/cli_output.py: print_info/success/warning/error, prompt helpers
- tools/path_security.py: validate_within_dir, has_traversal_component
- utils.py additions: safe_json_loads, read_json_file, read_jsonl,
  append_jsonl, env_str/lower/int/bool helpers
- hermes_constants.py additions: get_config_path, get_skills_dir,
  get_logs_dir, get_env_path

* refactor: migrate gateway adapters to shared helpers

- MessageDeduplicator: discord, slack, dingtalk, wecom, weixin, mattermost
- strip_markdown: bluebubbles, feishu, sms
- redact_phone: sms, signal
- ThreadParticipationTracker: discord, matrix
- _acquire/_release_platform_lock: telegram, discord, slack, whatsapp,
  signal, weixin

Net -316 lines across 19 files.

* refactor: migrate CLI modules to shared helpers

- tools_config.py: use cli_output print/prompt + curses_radiolist (-117 lines)
- setup.py: use cli_output print helpers + curses_radiolist (-101 lines)
- mcp_config.py: use cli_output prompt (-15 lines)
- memory_setup.py: use curses_radiolist (-86 lines)

Net -263 lines across 5 files.

* refactor: migrate to shared utility helpers

- safe_json_loads: agent/display.py (4 sites)
- get_config_path: skill_utils.py, hermes_logging.py, hermes_time.py
- get_skills_dir: skill_utils.py, prompt_builder.py
- Token estimation dedup: skills_tool.py imports from model_metadata
- Path security: skills_tool, cronjob_tools, skill_manager_tool, credential_files
- Non-atomic YAML writes: doctor.py, config.py now use atomic_yaml_write
- Platform dict: new platforms.py, skills_config + tools_config derive from it
- Anthropic key: new get_anthropic_key() in auth.py, used by doctor/status/config/main

* test: update tests for shared helper migrations

- test_dingtalk: use _dedup.is_duplicate() instead of _is_duplicate()
- test_mattermost: use _dedup instead of _seen_posts/_prune_seen
- test_signal: import redact_phone from helpers instead of signal
- test_discord_connect: _platform_lock_identity instead of _token_lock_identity
- test_telegram_conflict: updated lock error message format
- test_skill_manager_tool: 'escapes' instead of 'boundary' in error msgs
											
										
										
											2026-04-11 13:59:52 -07:00
 								    def _acquire_platform_lock(self, scope: str, identity: str, resource_desc: str) -> bool:
 								        """Acquire a scoped lock for this adapter. Returns True on success."""
 								        from gateway.status import acquire_scoped_lock
 								        self._platform_lock_scope = scope
 								        self._platform_lock_identity = identity
 								        acquired, existing = acquire_scoped_lock(
 								            scope, identity, metadata={'platform': self.platform.value}
 								        )
 								        if acquired:
 								            return True
 								        owner_pid = existing.get('pid') if isinstance(existing, dict) else None
 								        message = (
 								            f'{resource_desc} already in use'
 								            + (f' (PID {owner_pid})' if owner_pid else '')
 								            + '. Stop the other gateway first.'
 								        )
 								        logger.error('[%s] %s', self.name, message)
 								        self._set_fatal_error(f'{scope}_lock', message, retryable=False)
 								        return False
 								    def _release_platform_lock(self) -> None:
 								        """Release the scoped lock acquired by _acquire_platform_lock."""
 								        identity = getattr(self, '_platform_lock_identity', None)
 								        if not identity:
 								            return
 								        from gateway.status import release_scoped_lock
 								        release_scoped_lock(self._platform_lock_scope, identity)
 								        self._platform_lock_identity = None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    @property
 								    def name(self) -> str:
 								        """Human-readable name for this adapter."""
 								        return self.platform.value.title()
 								    @property
 								    def is_connected(self) -> bool:
 								        """Check if adapter is currently connected."""
 								        return self._running
 								    def set_message_handler(self, handler: MessageHandler) -> None:
 								        """
 								        Set the handler for incoming messages.
 								        The handler receives a MessageEvent and should return
 								        an optional response string.
 								        """
 								        self._message_handler = handler
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
 								    def set_busy_session_handler(self, handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]]) -> None:
 								        """Set an optional handler for messages arriving during active sessions."""
 								        self._busy_session_handler = handler
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat(gateway): Enable Slack thread replies without explicit @mentions

When a user replies in a Slack thread where the bot has an active
conversation session, the bot now processes the message even without
an explicit @mention. This improves UX for ongoing threaded
discussions.

Changes:
- Added set_session_store() to BasePlatformAdapter for adapters to
  check active sessions
- Modified SlackAdapter to detect thread replies and check if a
  session exists for that thread before requiring @mentions
- Updated GatewayRunner to inject the session store into adapters
- Added comprehensive tests for the new behavior

Fixes: Thread replies without @jarvis are now processed if there is
an active session, matching user expectations for conversation flow

											
										
										
											2026-04-06 11:46:17 -04:00
+								    def set_session_store(self, session_store: Any) -> None:
 								        """
 								        Set the session store for checking active sessions.
 								        Used by adapters that need to check if a thread/conversation
 								        has an active session before processing messages (e.g., Slack
 								        thread replies without explicit mentions).
 								        """
 								        self._session_store = session_store
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    @abstractmethod
 								    async def connect(self) -> bool:
 								        """
 								        Connect to the platform and start receiving messages.
 								        Returns True if connection was successful.
 								        """
 								        pass
 								    @abstractmethod
 								    async def disconnect(self) -> None:
 								        """Disconnect from the platform."""
 								        pass
 								    @abstractmethod
 								    async def send(
 								        self,
 								        chat_id: str,
 								        content: str,
 								        reply_to: Optional[str] = None,
 								        metadata: Optional[Dict[str, Any]] = None
 								    ) -> SendResult:
 								        """
 								        Send a message to a chat.
 								        Args:
 								            chat_id: The chat/channel ID to send to
 								            content: Message content (may be markdown)
 								            reply_to: Optional message ID to reply to
 								            metadata: Additional platform-specific options
 								        Returns:
 								            SendResult with success status and message ID
 								        """
 								        pass
-												feat(whatsapp): consolidate tool progress into single editable message

Instead of sending a separate WhatsApp message for each tool call during
agent execution (N+1 messages), the first tool sends a new message and
subsequent tools edit it to append their line. Result: 1 growing progress
message + 1 final response = 2 messages instead of N+1.

Changes:
- bridge.js: Add POST /edit endpoint using Baileys message editing
- base.py: Add optional edit_message() to BasePlatformAdapter (no-op
  default, so platforms without editing support work unchanged)
- whatsapp.py: Implement edit_message() calling bridge /edit
- run.py: Rewrite send_progress_messages() to accumulate tool lines and
  edit the progress message. Falls back to sending a new message if
  edit fails (graceful degradation).

Before (5 tools = 6 messages):
  ⚕ Hermes Agent ─── 🔍 web_search... "query"
  ⚕ Hermes Agent ─── 📄 web_extract... "url"
  ⚕ Hermes Agent ─── 💻 terminal... "pip install"
  ⚕ Hermes Agent ─── ✍️ write_file... "app.py"
  ⚕ Hermes Agent ─── 💻 terminal... "python app.py"
  ⚕ Hermes Agent ─── Done! The server is running...

After (5 tools = 2 messages):
  ⚕ Hermes Agent ───
  🔍 web_search... "query"
  📄 web_extract... "url"
  💻 terminal... "pip install"
  ✍️ write_file... "app.py"
  💻 terminal... "python app.py"

  ⚕ Hermes Agent ─── Done! The server is running...

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-02 14:13:35 -03:00
-												feat(dingtalk): AI Cards streaming, emoji reactions, and media handling

Cherry-picked from #10985 by pedh, adapted to current main:

* Keeps main's full group-chat gating (require_mention + allowed_users +
  free_response_chats + mention_patterns) — PR's simpler subset dropped.
* Keeps main's fire-and-forget process() dispatch + session_webhook
  fallback for SDK >= 0.24.
* Picks up PR's REQUIRES_EDIT_FINALIZE capability flag on
  BasePlatformAdapter + finalize kwarg on edit_message(), plumbed through
  stream_consumer.  Default False so Telegram/Slack/Discord/Matrix stay
  on the zero-overhead fast path.
* DingTalk AI Card lifecycle: per-chat _message_contexts, two-card flow
  (tool-progress + final response) with sibling auto-close driven by
  reply_to, idempotent 🤔Thinking → 🥳Done swap, $alibabacloud-dingtalk$
  for media URL resolution (replaces raw HTTP that was 403-ing).
* pyproject: dingtalk extra now dingtalk-stream>=0.20,<1 +
  alibabacloud-dingtalk>=2.0.0 + qrcode.

Closes #10991

Co-authored-by: pedh

											
										
										
											2026-04-17 19:13:09 -07:00
+								    # Default: the adapter treats ``finalize=True`` on edit_message as a
 								    # no-op and is happy to have the stream consumer skip redundant final
 								    # edits.  Subclasses that *require* an explicit finalize call to close
 								    # out the message lifecycle (e.g. rich card / AI assistant surfaces
 								    # such as DingTalk AI Cards) override this to True (class attribute or
 								    # property) so the stream consumer knows not to short-circuit.
 								    REQUIRES_EDIT_FINALIZE: bool = False
-												feat(whatsapp): consolidate tool progress into single editable message

Instead of sending a separate WhatsApp message for each tool call during
agent execution (N+1 messages), the first tool sends a new message and
subsequent tools edit it to append their line. Result: 1 growing progress
message + 1 final response = 2 messages instead of N+1.

Changes:
- bridge.js: Add POST /edit endpoint using Baileys message editing
- base.py: Add optional edit_message() to BasePlatformAdapter (no-op
  default, so platforms without editing support work unchanged)
- whatsapp.py: Implement edit_message() calling bridge /edit
- run.py: Rewrite send_progress_messages() to accumulate tool lines and
  edit the progress message. Falls back to sending a new message if
  edit fails (graceful degradation).

Before (5 tools = 6 messages):
  ⚕ Hermes Agent ─── 🔍 web_search... "query"
  ⚕ Hermes Agent ─── 📄 web_extract... "url"
  ⚕ Hermes Agent ─── 💻 terminal... "pip install"
  ⚕ Hermes Agent ─── ✍️ write_file... "app.py"
  ⚕ Hermes Agent ─── 💻 terminal... "python app.py"
  ⚕ Hermes Agent ─── Done! The server is running...

After (5 tools = 2 messages):
  ⚕ Hermes Agent ───
  🔍 web_search... "query"
  📄 web_extract... "url"
  💻 terminal... "pip install"
  ✍️ write_file... "app.py"
  💻 terminal... "python app.py"

  ⚕ Hermes Agent ─── Done! The server is running...

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-02 14:13:35 -03:00
+								    async def edit_message(
 								        self,
 								        chat_id: str,
 								        message_id: str,
 								        content: str,
-												feat(dingtalk): AI Cards streaming, emoji reactions, and media handling

Cherry-picked from #10985 by pedh, adapted to current main:

* Keeps main's full group-chat gating (require_mention + allowed_users +
  free_response_chats + mention_patterns) — PR's simpler subset dropped.
* Keeps main's fire-and-forget process() dispatch + session_webhook
  fallback for SDK >= 0.24.
* Picks up PR's REQUIRES_EDIT_FINALIZE capability flag on
  BasePlatformAdapter + finalize kwarg on edit_message(), plumbed through
  stream_consumer.  Default False so Telegram/Slack/Discord/Matrix stay
  on the zero-overhead fast path.
* DingTalk AI Card lifecycle: per-chat _message_contexts, two-card flow
  (tool-progress + final response) with sibling auto-close driven by
  reply_to, idempotent 🤔Thinking → 🥳Done swap, $alibabacloud-dingtalk$
  for media URL resolution (replaces raw HTTP that was 403-ing).
* pyproject: dingtalk extra now dingtalk-stream>=0.20,<1 +
  alibabacloud-dingtalk>=2.0.0 + qrcode.

Closes #10991

Co-authored-by: pedh

											
										
										
											2026-04-17 19:13:09 -07:00
+								        *,
 								        finalize: bool = False,
-												feat(whatsapp): consolidate tool progress into single editable message

Instead of sending a separate WhatsApp message for each tool call during
agent execution (N+1 messages), the first tool sends a new message and
subsequent tools edit it to append their line. Result: 1 growing progress
message + 1 final response = 2 messages instead of N+1.

Changes:
- bridge.js: Add POST /edit endpoint using Baileys message editing
- base.py: Add optional edit_message() to BasePlatformAdapter (no-op
  default, so platforms without editing support work unchanged)
- whatsapp.py: Implement edit_message() calling bridge /edit
- run.py: Rewrite send_progress_messages() to accumulate tool lines and
  edit the progress message. Falls back to sending a new message if
  edit fails (graceful degradation).

Before (5 tools = 6 messages):
  ⚕ Hermes Agent ─── 🔍 web_search... "query"
  ⚕ Hermes Agent ─── 📄 web_extract... "url"
  ⚕ Hermes Agent ─── 💻 terminal... "pip install"
  ⚕ Hermes Agent ─── ✍️ write_file... "app.py"
  ⚕ Hermes Agent ─── 💻 terminal... "python app.py"
  ⚕ Hermes Agent ─── Done! The server is running...

After (5 tools = 2 messages):
  ⚕ Hermes Agent ───
  🔍 web_search... "query"
  📄 web_extract... "url"
  💻 terminal... "pip install"
  ✍️ write_file... "app.py"
  💻 terminal... "python app.py"

  ⚕ Hermes Agent ─── Done! The server is running...

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-02 14:13:35 -03:00
+								    ) -> SendResult:
 								        """
 								        Edit a previously sent message. Optional — platforms that don't
 								        support editing return success=False and callers fall back to
 								        sending a new message.
-												feat(dingtalk): AI Cards streaming, emoji reactions, and media handling

Cherry-picked from #10985 by pedh, adapted to current main:

* Keeps main's full group-chat gating (require_mention + allowed_users +
  free_response_chats + mention_patterns) — PR's simpler subset dropped.
* Keeps main's fire-and-forget process() dispatch + session_webhook
  fallback for SDK >= 0.24.
* Picks up PR's REQUIRES_EDIT_FINALIZE capability flag on
  BasePlatformAdapter + finalize kwarg on edit_message(), plumbed through
  stream_consumer.  Default False so Telegram/Slack/Discord/Matrix stay
  on the zero-overhead fast path.
* DingTalk AI Card lifecycle: per-chat _message_contexts, two-card flow
  (tool-progress + final response) with sibling auto-close driven by
  reply_to, idempotent 🤔Thinking → 🥳Done swap, $alibabacloud-dingtalk$
  for media URL resolution (replaces raw HTTP that was 403-ing).
* pyproject: dingtalk extra now dingtalk-stream>=0.20,<1 +
  alibabacloud-dingtalk>=2.0.0 + qrcode.

Closes #10991

Co-authored-by: pedh

											
										
										
											2026-04-17 19:13:09 -07:00
 								        ``finalize`` signals that this is the last edit in a streaming
 								        sequence.  Most platforms (Telegram, Slack, Discord, Matrix,
 								        etc.) treat it as a no-op because their edit APIs have no notion
 								        of message lifecycle state — an edit is an edit.  Platforms that
 								        render streaming updates with a distinct "in progress" state and
 								        require explicit closure (e.g. rich card / AI assistant surfaces
 								        such as DingTalk AI Cards) use it to finalize the message and
 								        transition the UI out of the streaming indicator — those should
 								        also set ``REQUIRES_EDIT_FINALIZE = True`` so callers route a
 								        final edit through even when content is unchanged.  Callers
 								        should set ``finalize=True`` on the final edit of a streamed
 								        response (typically when ``got_done`` fires in the stream
 								        consumer) and leave it ``False`` on intermediate edits.
-												feat(whatsapp): consolidate tool progress into single editable message

Instead of sending a separate WhatsApp message for each tool call during
agent execution (N+1 messages), the first tool sends a new message and
subsequent tools edit it to append their line. Result: 1 growing progress
message + 1 final response = 2 messages instead of N+1.

Changes:
- bridge.js: Add POST /edit endpoint using Baileys message editing
- base.py: Add optional edit_message() to BasePlatformAdapter (no-op
  default, so platforms without editing support work unchanged)
- whatsapp.py: Implement edit_message() calling bridge /edit
- run.py: Rewrite send_progress_messages() to accumulate tool lines and
  edit the progress message. Falls back to sending a new message if
  edit fails (graceful degradation).

Before (5 tools = 6 messages):
  ⚕ Hermes Agent ─── 🔍 web_search... "query"
  ⚕ Hermes Agent ─── 📄 web_extract... "url"
  ⚕ Hermes Agent ─── 💻 terminal... "pip install"
  ⚕ Hermes Agent ─── ✍️ write_file... "app.py"
  ⚕ Hermes Agent ─── 💻 terminal... "python app.py"
  ⚕ Hermes Agent ─── Done! The server is running...

After (5 tools = 2 messages):
  ⚕ Hermes Agent ───
  🔍 web_search... "query"
  📄 web_extract... "url"
  💻 terminal... "pip install"
  ✍️ write_file... "app.py"
  💻 terminal... "python app.py"

  ⚕ Hermes Agent ─── Done! The server is running...

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-02 14:13:35 -03:00
+								        """
 								        return SendResult(success=False, error="Not supported")
-												fix(gateway): add metadata param to _keep_typing and base send_typing

_keep_typing() was called with metadata= for thread-aware typing
indicators, but neither it nor the base send_typing() accepted
that parameter. Most adapter overrides (Slack, Discord, Telegram,
WhatsApp, HA) already accept metadata=None, but the base class
and Signal adapter did not.

- Add metadata=None to BasePlatformAdapter.send_typing()
- Add metadata=None to BasePlatformAdapter._keep_typing(), pass through
- Add metadata=None to SignalAdapter.send_typing()

Fixes TypeError in _process_message_background for Signal.

											
										
										
											2026-03-10 15:08:40 -07:00
+								    async def send_typing(self, chat_id: str, metadata=None) -> None:
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        """
 								        Send a typing indicator.
 								        Override in subclasses if the platform supports it.
-												fix(gateway): add metadata param to _keep_typing and base send_typing

_keep_typing() was called with metadata= for thread-aware typing
indicators, but neither it nor the base send_typing() accepted
that parameter. Most adapter overrides (Slack, Discord, Telegram,
WhatsApp, HA) already accept metadata=None, but the base class
and Signal adapter did not.

- Add metadata=None to BasePlatformAdapter.send_typing()
- Add metadata=None to BasePlatformAdapter._keep_typing(), pass through
- Add metadata=None to SignalAdapter.send_typing()

Fixes TypeError in _process_message_background for Signal.

											
										
										
											2026-03-10 15:08:40 -07:00
+								        metadata: optional dict with platform-specific context (e.g. thread_id for Slack).
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        """
 								        pass
-												feat(discord): persistent typing indicator for DMs

Based on PR #2427 by @oxngon (core feature extracted, reformatting
and unrelated changes dropped).

Discord's TYPING_START gateway event is unreliable for bot DMs. This
adds a background typing loop that hits POST /channels/{id}/typing
every 8 seconds (indicator lasts ~10s) until the response is sent.

- send_typing() starts a per-channel background loop (idempotent)
- stop_typing() cancels it (called after _run_agent returns)
- Base adapter gets stop_typing() as a no-op default
- Per-channel tracking via _typing_tasks dict prevents duplicates

											
										
										
											2026-03-22 04:47:53 -07:00
 								    async def stop_typing(self, chat_id: str) -> None:
 								        """Stop a persistent typing indicator (if the platform uses one).
 								        Override in subclasses that start background typing loops.
 								        Default is a no-op for platforms with one-shot typing indicators.
 								        """
 								        pass
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								    async def send_image(
 								        self,
 								        chat_id: str,
 								        image_url: str,
 								        caption: Optional[str] = None,
 								        reply_to: Optional[str] = None,
-												fix: add metadata param to base send_image and forward in send_animation

_send_response_parts() calls send_image(metadata=_thread_metadata) but
the base class signature didn't accept metadata, crashing platforms that
don't override send_image. send_animation already had the param but
wasn't forwarding it.

Credit: @0xbyt4 (PR #1077)

											
										
										
											2026-03-17 02:02:23 -07:00
+								        metadata: Optional[Dict[str, Any]] = None,
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								    ) -> SendResult:
 								        """
 								        Send an image natively via the platform API.
 								        Override in subclasses to send images as proper attachments
 								        instead of plain-text URLs. Default falls back to sending the
 								        URL as a text message.
 								        """
 								        # Fallback: send URL as text (subclasses override for native images)
 								        text = f"{caption}\n{image_url}" if caption else image_url
 								        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
-												feat(animation): add support for sending animated GIFs in BasePlatformAdapter and TelegramAdapter

											
										
										
											2026-02-28 11:25:44 -08:00
+								    async def send_animation(
 								        self,
 								        chat_id: str,
 								        animation_url: str,
 								        caption: Optional[str] = None,
 								        reply_to: Optional[str] = None,
-												fix: send_animation metadata, MarkdownV2 inline code splitting, tirith cosign-free install (#1626)

* fix: Anthropic OAuth compatibility — Claude Code identity fingerprinting

Anthropic routes OAuth/subscription requests based on Claude Code's
identity markers. Without them, requests get intermittent 500 errors
(~25% failure rate observed). This matches what pi-ai (clawdbot) and
OpenCode both implement for OAuth compatibility.

Changes (OAuth tokens only — API key users unaffected):

1. Headers: user-agent 'claude-cli/2.1.2 (external, cli)' + x-app 'cli'
2. System prompt: prepend 'You are Claude Code, Anthropic's official CLI'
3. System prompt sanitization: replace Hermes/Nous references
4. Tool names: prefix with 'mcp_' (Claude Code convention for non-native tools)
5. Tool name stripping: remove 'mcp_' prefix from response tool calls

Before: 9/12 OK, 1 hard fail, 4 needed retries (~25% error rate)
After: 16/16 OK, 0 failures, 0 retries (0% error rate)

* fix: three gateway issues from user error logs

1. send_animation missing metadata kwarg (base.py)
   - Base class send_animation lacked the metadata parameter that the
     call site in base.py line 917 passes. Telegram's override accepted
     it, but any platform without an override (Discord, Slack, etc.)
     hit TypeError. Added metadata to base class signature.

2. MarkdownV2 split-inside-inline-code (base.py truncate_message)
   - truncate_message could split at a space inside an inline code span
     (e.g. `function(arg1, arg2)`), leaving an unpaired backtick and
     unescaped parentheses in the chunk. Telegram rejects with
     'character ( is reserved'. Added inline code awareness to the
     split-point finder — detects odd backtick counts and moves the
     split before the code span.

3. tirith auto-install without cosign (tirith_security.py)
   - Previously required cosign on PATH for auto-install, blocking
     install entirely with a warning if missing. Now proceeds with
     SHA-256 checksum verification only when cosign is unavailable.
     Cosign is still used for full supply chain verification when
     present. If cosign IS present but verification explicitly fails,
     install is still aborted (tampered release).
											
										
										
											2026-03-16 23:39:41 -07:00
+								        metadata: Optional[Dict[str, Any]] = None,
-												feat(animation): add support for sending animated GIFs in BasePlatformAdapter and TelegramAdapter

											
										
										
											2026-02-28 11:25:44 -08:00
+								    ) -> SendResult:
 								        """
 								        Send an animated GIF natively via the platform API.
 								        Override in subclasses to send GIFs as proper animations
 								        (e.g., Telegram send_animation) so they auto-play inline.
 								        Default falls back to send_image.
 								        """
-												fix: add metadata param to base send_image and forward in send_animation

_send_response_parts() calls send_image(metadata=_thread_metadata) but
the base class signature didn't accept metadata, crashing platforms that
don't override send_image. send_animation already had the param but
wasn't forwarding it.

Credit: @0xbyt4 (PR #1077)

											
										
										
											2026-03-17 02:02:23 -07:00
+								        return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to, metadata=metadata)
-												feat(animation): add support for sending animated GIFs in BasePlatformAdapter and TelegramAdapter

											
										
										
											2026-02-28 11:25:44 -08:00
 								    @staticmethod
 								    def _is_animation_url(url: str) -> bool:
 								        """Check if a URL points to an animated GIF (vs a static image)."""
 								        lower = url.lower().split('?')[0]  # Strip query params
 								        return lower.endswith('.gif')
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								    @staticmethod
 								    def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]:
 								        """
 								        Extract image URLs from markdown and HTML image tags in a response.
 								        Finds patterns like:
 								        - ![alt text](https://example.com/image.png)
 								        - <img src="https://example.com/image.png">
 								        - <img src="https://example.com/image.png"></img>
 								        Args:
 								            content: The response text to scan.
 								        Returns:
 								            Tuple of (list of (url, alt_text) pairs, cleaned content with image tags removed).
 								        """
 								        images = []
 								        cleaned = content
 								        # Match markdown images: ![alt](url)
 								        md_pattern = r'!\[([^\]]*)\]\((https?://[^\s\)]+)\)'
 								        for match in re.finditer(md_pattern, content):
 								            alt_text = match.group(1)
 								            url = match.group(2)
 								            # Only extract URLs that look like actual images
 								            if any(url.lower().endswith(ext) or ext in url.lower() for ext in
 								                   ['.png', '.jpg', '.jpeg', '.gif', '.webp', 'fal.media', 'fal-cdn', 'replicate.delivery']):
 								                images.append((url, alt_text))
 								        # Match HTML img tags: <img src="url"> or <img src="url"></img> or <img src="url"/>
 								        html_pattern = r'<img\s+src=["\']?(https?://[^\s"\'<>]+)["\']?\s*/?>\s*(?:</img>)?'
 								        for match in re.finditer(html_pattern, content):
 								            url = match.group(1)
 								            images.append((url, ""))
-												fix: platform base extract_images and truncate_message bugs + tests

- extract_images: only remove extracted image tags from content, preserve
  non-image markdown links (e.g. PDFs) that were previously silently lost
- truncate_message: walk only chunk_body (not prepended prefix) so the
  reopened code fence does not toggle in_code off, leaving continuation
  chunks with unclosed code blocks
- Add 49 unit tests covering MessageEvent command parsing, extract_images,
  extract_media, truncate_message code block handling, and _get_human_delay

											
										
										
											2026-02-28 21:21:03 +03:00
+								        # Remove only the matched image tags from content (not all markdown images)
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								        if images:
-												fix: platform base extract_images and truncate_message bugs + tests

- extract_images: only remove extracted image tags from content, preserve
  non-image markdown links (e.g. PDFs) that were previously silently lost
- truncate_message: walk only chunk_body (not prepended prefix) so the
  reopened code fence does not toggle in_code off, leaving continuation
  chunks with unclosed code blocks
- Add 49 unit tests covering MessageEvent command parsing, extract_images,
  extract_media, truncate_message code block handling, and _get_human_delay

											
										
										
											2026-02-28 21:21:03 +03:00
+								            extracted_urls = {url for url, _ in images}
 								            def _remove_if_extracted(match):
 								                url = match.group(2) if match.lastindex >= 2 else match.group(1)
 								                return '' if url in extracted_urls else match.group(0)
 								            cleaned = re.sub(md_pattern, _remove_if_extracted, cleaned)
 								            cleaned = re.sub(html_pattern, _remove_if_extracted, cleaned)
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								            # Clean up leftover blank lines
 								            cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
 								        return images, cleaned
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								    async def send_voice(
 								        self,
 								        chat_id: str,
 								        audio_path: str,
 								        caption: Optional[str] = None,
 								        reply_to: Optional[str] = None,
-												fix: add **kwargs to base/telegram media send methods for metadata routing

The MEDIA routing in _process_message_background passes
metadata=_thread_metadata to send_video, send_document, and
send_image_file — but none accepted it, causing TypeError silently
caught by the except handler. Files just failed to send.

Fix: add **kwargs to all four base class media methods and their
Telegram overrides.

											
										
										
											2026-03-11 03:23:53 -07:00
+								        **kwargs,
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								    ) -> SendResult:
 								        """
 								        Send an audio file as a native voice message via the platform API.
 								        Override in subclasses to send audio as voice bubbles (Telegram)
 								        or file attachments (Discord). Default falls back to sending the
 								        file path as text.
 								        """
 								        text = f"🔊 Audio: {audio_path}"
 								        if caption:
 								            text = f"{caption}\n{text}"
 								        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
-												feat: add voice conversation support and futuristic UI redesign

- Auto-TTS: voice messages get spoken response (audio first, then text)
- STT: Groq Whisper fallback when VOICE_TOOLS_OPENAI_KEY not set
- Futuristic UI: glassmorphism, centered container, purple theme, glow effects
- Voice bubble: custom waveform player with seek and progress
- Invisible TTS playback via play_tts() method (no audio file in chat)
- Add hermes-web toolset with full tool access
- Register Platform.WEB in toolset/config maps
- Update docs for voice conversation feature

											
										
										
											2026-03-11 20:16:57 +03:00
+								    async def play_tts(
 								        self,
 								        chat_id: str,
 								        audio_path: str,
 								        **kwargs,
 								    ) -> SendResult:
 								        """
 								        Play auto-TTS audio for voice replies.
 								        Override in subclasses for invisible playback (e.g. Web UI).
 								        Default falls back to send_voice (shows audio player).
 								        """
 								        return await self.send_voice(chat_id=chat_id, audio_path=audio_path, **kwargs)
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								    async def send_video(
 								        self,
 								        chat_id: str,
 								        video_path: str,
 								        caption: Optional[str] = None,
 								        reply_to: Optional[str] = None,
-												fix: add **kwargs to base/telegram media send methods for metadata routing

The MEDIA routing in _process_message_background passes
metadata=_thread_metadata to send_video, send_document, and
send_image_file — but none accepted it, causing TypeError silently
caught by the except handler. Files just failed to send.

Fix: add **kwargs to all four base class media methods and their
Telegram overrides.

											
										
										
											2026-03-11 03:23:53 -07:00
+								        **kwargs,
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								    ) -> SendResult:
 								        """
 								        Send a video natively via the platform API.
 								        Override in subclasses to send videos as inline playable media.
 								        Default falls back to sending the file path as text.
 								        """
 								        text = f"🎬 Video: {video_path}"
 								        if caption:
 								            text = f"{caption}\n{text}"
 								        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
 								    async def send_document(
 								        self,
 								        chat_id: str,
 								        file_path: str,
 								        caption: Optional[str] = None,
 								        file_name: Optional[str] = None,
 								        reply_to: Optional[str] = None,
-												fix: add **kwargs to base/telegram media send methods for metadata routing

The MEDIA routing in _process_message_background passes
metadata=_thread_metadata to send_video, send_document, and
send_image_file — but none accepted it, causing TypeError silently
caught by the except handler. Files just failed to send.

Fix: add **kwargs to all four base class media methods and their
Telegram overrides.

											
										
										
											2026-03-11 03:23:53 -07:00
+								        **kwargs,
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								    ) -> SendResult:
 								        """
 								        Send a document/file natively via the platform API.
 								        Override in subclasses to send files as downloadable attachments.
 								        Default falls back to sending the file path as text.
 								        """
 								        text = f"📎 File: {file_path}"
 								        if caption:
 								            text = f"{caption}\n{text}"
 								        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
 								    async def send_image_file(
 								        self,
 								        chat_id: str,
 								        image_path: str,
 								        caption: Optional[str] = None,
 								        reply_to: Optional[str] = None,
-												fix: add **kwargs to base/telegram media send methods for metadata routing

The MEDIA routing in _process_message_background passes
metadata=_thread_metadata to send_video, send_document, and
send_image_file — but none accepted it, causing TypeError silently
caught by the except handler. Files just failed to send.

Fix: add **kwargs to all four base class media methods and their
Telegram overrides.

											
										
										
											2026-03-11 03:23:53 -07:00
+								        **kwargs,
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								    ) -> SendResult:
 								        """
 								        Send a local image file natively via the platform API.
 								        Unlike send_image() which takes a URL, this takes a local file path.
 								        Override in subclasses for native photo attachments.
 								        Default falls back to sending the file path as text.
 								        """
 								        text = f"🖼️ Image: {image_path}"
 								        if caption:
 								            text = f"{caption}\n{text}"
 								        return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								    @staticmethod
 								    def extract_media(content: str) -> Tuple[List[Tuple[str, bool]], str]:
 								        """
 								        Extract MEDIA:<path> tags and [[audio_as_voice]] directives from response text.
 								        The TTS tool returns responses like:
 								            [[audio_as_voice]]
 								            MEDIA:/path/to/audio.ogg
 								        Args:
 								            content: The response text to scan.
 								        Returns:
 								            Tuple of (list of (path, is_voice) pairs, cleaned content with tags removed).
 								        """
 								        media = []
 								        cleaned = content
 								        # Check for [[audio_as_voice]] directive
 								        has_voice_tag = "[[audio_as_voice]]" in content
 								        cleaned = cleaned.replace("[[audio_as_voice]]", "")
-												fix: complete send_message MEDIA delivery salvage

- prevent raw MEDIA tag leakage outside the gateway pipeline
- make extract_media handle quoted/backticked paths and optional whitespace
- send Telegram media natively with explicit error/warning handling
- add regression tests for Telegram media dispatch and MEDIA parsing

											
										
										
											2026-03-14 04:01:46 -07:00
+								        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
 								        # and quoted/backticked paths for LLM-formatted outputs.
 								        media_pattern = re.compile(
-												feat: support document/archive extensions in MEDIA: tag extraction

Add epub, pdf, zip, rar, 7z, docx, xlsx, pptx, txt, csv, apk, ipa to
the MEDIA: path regex in extract_media(). These file types were already
routed to send_document() in the delivery loop (base.py:1705), but the
extraction regex only matched media extensions (audio/video/image),
causing document paths to fall through to the generic \S+ branch which
could fail silently in some cases. This explicit list ensures reliable
matching and delivery for all common document formats.

											
										
										
											2026-04-12 16:25:22 +08:00
+								            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
-												fix: complete send_message MEDIA delivery salvage

- prevent raw MEDIA tag leakage outside the gateway pipeline
- make extract_media handle quoted/backticked paths and optional whitespace
- send Telegram media natively with explicit error/warning handling
- add regression tests for Telegram media dispatch and MEDIA parsing

											
										
										
											2026-03-14 04:01:46 -07:00
+								        )
 								        for match in media_pattern.finditer(content):
 								            path = match.group("path").strip()
 								            if len(path) >= 2 and path[0] == path[-1] and path[0] in "`\"'":
 								                path = path[1:-1].strip()
 								            path = path.lstrip("`\"'").rstrip("`\"',.;:)}]")
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								            if path:
-												fix(matrix): E2EE and migration bugfixes (#10860)

* - make buffered streaming
- fix path naming to expand `~` for agent.
- fix stripping of matrix ID to not remove other mentions / localports.

* fix(matrix): register MembershipEventDispatcher for invite auto-join

The mautrix migration (#7518) broke auto-join because InternalEventType.INVITE
events are only dispatched when MembershipEventDispatcher is registered on the
client. Without it, _on_invite is dead code and the bot silently ignores all
room invites.

Closes #10094
Closes #10725
Refs: PR #10135 (digging-airfare-4u), PR #10732 (fxfitz)

* fix(matrix): preserve _joined_rooms reference for CryptoStateStore

connect() reassigned self._joined_rooms = set(...) after initial sync,
orphaning the reference captured by _CryptoStateStore at init time.
find_shared_rooms() returned [] forever, breaking Megolm session rotation
on membership changes.

Mutate in place with clear() + update() so the CryptoStateStore reference
stays valid.

Refs #8174, PR #8215

* fix(matrix): remove dual ROOM_ENCRYPTED handler to fix dedup race

mautrix auto-registers DecryptionDispatcher when client.crypto is set.
The adapter also registered _on_encrypted_event for the same event type.
_on_encrypted_event had zero awaits and won the race to mark event IDs
in the dedup set, causing _on_room_message to drop successfully decrypted
events from DecryptionDispatcher. The retry loop masked this by re-decrypting
every message ~4 seconds later.

Remove _on_encrypted_event entirely. DecryptionDispatcher handles decryption;
genuinely undecryptable events are logged by mautrix and retried on next
key exchange.

Refs #8174, PR #8215

* fix(matrix): re-verify device keys after share_keys() upload

Matrix homeservers treat ed25519 identity keys as immutable per device.
share_keys() can return 200 but silently ignore new keys if the device
already exists with different identity keys. The bot would proceed with
shared=True while peers encrypt to the old (unreachable) keys.

Now re-queries the server after share_keys() and fails closed if keys
don't match, with an actionable error message.

Refs #8174, PR #8215

* fix(matrix): encrypt outbound attachments in E2EE rooms

_upload_and_send() uploaded raw bytes and used the 'url' key for all
rooms. In E2EE rooms, media must be encrypted client-side with
encrypt_attachment(), the ciphertext uploaded, and the 'file' key
(with key/iv/hashes) used instead of 'url'.

Now detects encrypted rooms via state_store.is_encrypted() and
branches to the encrypted upload path.

Refs: PR #9822 (charles-brooks)

* fix(matrix): add stop_typing to clear typing indicator after response

The adapter set a 30-second typing timeout but never cleared it.
The base class stop_typing() is a no-op, so the typing indicator
lingered for up to 30 seconds after each response.

Closes #6016
Refs: PR #6020 (r266-tech)

* fix(matrix): cache all media types locally, not just photos/voice

should_cache_locally only covered PHOTO, VOICE, and encrypted media.
Unencrypted audio/video/documents in plaintext rooms were passed as MXC
URLs that require authentication the agent doesn't have, resulting
in 401 errors.

Refs #3487, #3806

* fix(matrix): detect stale OTK conflict on startup and fail closed

When crypto state is wiped but the same device ID is reused, the
homeserver may still hold one-time keys signed with the previous
identity key. Identity key re-upload succeeds but OTK uploads fail
with "already exists" and a signature mismatch. Peers cannot
establish new Olm sessions, so all new messages are undecryptable.

Now proactively flushes OTKs via share_keys() during connect() and
catches the "already exists" error with an actionable log message
telling the operator to purge the device from the homeserver or
generate a fresh device ID.

Also documents the crypto store recovery procedure in the Matrix
setup guide.

Refs #8174

* docs(matrix): improve crypto recovery docs per review

- Put easy path (fresh access token) first, manual purge second
- URL-encode user ID in Synapse admin API example
- Note that device deletion may invalidate the access token
- Add "stop Synapse first" caveat for direct SQLite approach
- Mention the fail-closed startup detection behavior
- Add back-reference from upgrade section to OTK warning

* refactor(matrix): cleanup from code review

- Extract _extract_server_ed25519() and _reverify_keys_after_upload()
  to deduplicate the re-verification block (was copy-pasted in two
  places, three copies of ed25519 key extraction total)
- Remove dead code: _pending_megolm, _retry_pending_decryptions,
  _MAX_PENDING_EVENTS, _PENDING_EVENT_TTL — all orphaned after
  removing _on_encrypted_event
- Remove tautological TestMediaCacheGate (tested its own predicate,
  not production code)
- Remove dead TestMatrixMegolmEventHandling and
  TestMatrixRetryPendingDecryptions (tested removed methods)
- Merge duplicate TestMatrixStopTyping into TestMatrixTypingIndicator
- Trim comment to just the "why"
											
										
										
											2026-04-16 15:33:02 -07:00
+								                media.append((os.path.expanduser(path), has_voice_tag))
-												fix: complete send_message MEDIA delivery salvage

- prevent raw MEDIA tag leakage outside the gateway pipeline
- make extract_media handle quoted/backticked paths and optional whitespace
- send Telegram media natively with explicit error/warning handling
- add regression tests for Telegram media dispatch and MEDIA parsing

											
										
										
											2026-03-14 04:01:46 -07:00
 								        # Remove MEDIA tags from content (including surrounding quote/backtick wrappers)
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								        if media:
-												fix: complete send_message MEDIA delivery salvage

- prevent raw MEDIA tag leakage outside the gateway pipeline
- make extract_media handle quoted/backticked paths and optional whitespace
- send Telegram media natively with explicit error/warning handling
- add regression tests for Telegram media dispatch and MEDIA parsing

											
										
										
											2026-03-14 04:01:46 -07:00
+								            cleaned = media_pattern.sub('', cleaned)
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								            cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
 								        return media, cleaned
-												feat: auto-detect local file paths in gateway responses for native media delivery (#1640)

Small models (7B-14B) can't reliably use MEDIA: or IMAGE: syntax. This
adds extract_local_files() to BasePlatformAdapter that regex-detects
bare local file paths ending in image/video extensions, validates them
with os.path.isfile(), and delivers them as native platform attachments.

Hardened over the original PR:
- Code-block exclusion: paths inside fenced blocks and inline code are
  skipped so code samples are never mutilated
- URL rejection: negative lookbehind prevents matching path segments
  inside HTTP URLs
- Relative path rejection: ./foo.png no longer matches
- Tilde path cleanup: raw ~/... form is removed from response text
- Deduplication by expanded path
- Added .webm to _VIDEO_EXTS
- Fallback to send_document for unrecognized media extensions

Based on PR #1636 by sudoingX.

Co-authored-by: sudoingX <sudoingX@users.noreply.github.com>
											
										
										
											2026-03-17 01:47:34 -07:00
 								    @staticmethod
 								    def extract_local_files(content: str) -> Tuple[List[str], str]:
 								        """
 								        Detect bare local file paths in response text for native media delivery.
 								        Matches absolute paths (/...) and tilde paths (~/) ending in common
 								        image or video extensions.  Validates each candidate with
 								        ``os.path.isfile()`` to avoid false positives from URLs or
 								        non-existent paths.
 								        Paths inside fenced code blocks (``` ... ```) and inline code
 								        (`...`) are ignored so that code samples are never mutilated.
 								        Returns:
 								            Tuple of (list of expanded file paths, cleaned text with the
 								            raw path strings removed).
 								        """
 								        _LOCAL_MEDIA_EXTS = (
 								            '.png', '.jpg', '.jpeg', '.gif', '.webp',
 								            '.mp4', '.mov', '.avi', '.mkv', '.webm',
 								        )
 								        ext_part = '|'.join(e.lstrip('.') for e in _LOCAL_MEDIA_EXTS)
 								        # (?<![/:\w.]) prevents matching inside URLs (e.g. https://…/img.png)
 								        #             and relative paths (./foo.png)
 								        # (?:~/|/)    anchors to absolute or home-relative paths
 								        path_re = re.compile(
 								            r'(?<![/:\w.])(?:~/|/)(?:[\w.\-]+/)*[\w.\-]+\.(?:' + ext_part + r')\b',
 								            re.IGNORECASE,
 								        )
 								        # Build spans covered by fenced code blocks and inline code
 								        code_spans: list = []
 								        for m in re.finditer(r'```[^\n]*\n.*?```', content, re.DOTALL):
 								            code_spans.append((m.start(), m.end()))
 								        for m in re.finditer(r'`[^`\n]+`', content):
 								            code_spans.append((m.start(), m.end()))
 								        def _in_code(pos: int) -> bool:
 								            return any(s <= pos < e for s, e in code_spans)
 								        found: list = []  # (raw_match_text, expanded_path)
 								        for match in path_re.finditer(content):
 								            if _in_code(match.start()):
 								                continue
 								            raw = match.group(0)
 								            expanded = os.path.expanduser(raw)
 								            if os.path.isfile(expanded):
 								                found.append((raw, expanded))
 								        # Deduplicate by expanded path, preserving discovery order
 								        seen: set = set()
 								        unique: list = []
 								        for raw, expanded in found:
 								            if expanded not in seen:
 								                seen.add(expanded)
 								                unique.append((raw, expanded))
 								        paths = [expanded for _, expanded in unique]
 								        cleaned = content
 								        if unique:
 								            for raw, _exp in unique:
 								                cleaned = cleaned.replace(raw, '')
 								            cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
 								        return paths, cleaned
-												fix(gateway): stop typing loops on session interrupt

											
										
										
											2026-04-18 21:21:55 -06:00
+								    async def _keep_typing(
 								        self,
 								        chat_id: str,
 								        interval: float = 2.0,
 								        metadata=None,
 								        stop_event: asyncio.Event | None = None,
 								    ) -> None:
-												Implement continuous typing indicator in message handling

- Added a new private method `_keep_typing` to send a typing indicator continuously while processing messages, refreshing every 4 seconds to comply with Telegram/Discord limitations.
- Updated the `handle_message` method to initiate the typing indicator at the start of message processing and ensure it stops once processing is complete, improving user experience during message handling.

											
										
										
											2026-02-03 14:51:31 -08:00
+								        """
 								        Continuously send typing indicator until cancelled.
-												Refine typing indicator behavior in message handling

- Adjusted the `_keep_typing` method to refresh the typing indicator every 2 seconds instead of 4, improving responsiveness after progress messages.
- Updated the `GatewayRunner` to restore the typing indicator after sending progress messages, enhancing user experience during message processing.

											
										
										
											2026-02-03 15:06:18 -08:00
+								        Telegram/Discord typing status expires after ~5 seconds, so we refresh every 2
 								        to recover quickly after progress messages interrupt it.
-												fix: pause typing indicator during approval waits (#5893)

When the agent waits for dangerous-command approval, the typing
indicator (_keep_typing loop) kept refreshing. On Slack's Assistant
API this is critical: assistant_threads_setStatus disables the
compose box, preventing users from typing /approve or /deny.

- Add _typing_paused set + pause/resume methods to BasePlatformAdapter
- _keep_typing skips send_typing when chat_id is paused
- _approval_notify_sync pauses typing before sending approval prompt
- _handle_approve_command / _handle_deny_command resume typing after

Benefits all platforms — no reason to show 'is thinking...' while
the agent is idle waiting for human input.
											
										
										
											2026-04-07 11:04:50 -07:00
 								        Skips send_typing when the chat is in ``_typing_paused`` (e.g. while
 								        the agent is waiting for dangerous-command approval).  This is critical
 								        for Slack's Assistant API where ``assistant_threads_setStatus`` disables
 								        the compose box — pausing lets the user type ``/approve`` or ``/deny``.
-												Implement continuous typing indicator in message handling

- Added a new private method `_keep_typing` to send a typing indicator continuously while processing messages, refreshing every 4 seconds to comply with Telegram/Discord limitations.
- Updated the `handle_message` method to initiate the typing indicator at the start of message processing and ensure it stops once processing is complete, improving user experience during message handling.

											
										
										
											2026-02-03 14:51:31 -08:00
+								        """
 								        try:
 								            while True:
-												fix(gateway): stop typing loops on session interrupt

											
										
										
											2026-04-18 21:21:55 -06:00
+								                if stop_event is not None and stop_event.is_set():
 								                    return
-												fix: pause typing indicator during approval waits (#5893)

When the agent waits for dangerous-command approval, the typing
indicator (_keep_typing loop) kept refreshing. On Slack's Assistant
API this is critical: assistant_threads_setStatus disables the
compose box, preventing users from typing /approve or /deny.

- Add _typing_paused set + pause/resume methods to BasePlatformAdapter
- _keep_typing skips send_typing when chat_id is paused
- _approval_notify_sync pauses typing before sending approval prompt
- _handle_approve_command / _handle_deny_command resume typing after

Benefits all platforms — no reason to show 'is thinking...' while
the agent is idle waiting for human input.
											
										
										
											2026-04-07 11:04:50 -07:00
+								                if chat_id not in self._typing_paused:
 								                    await self.send_typing(chat_id, metadata=metadata)
-												fix(gateway): stop typing loops on session interrupt

											
										
										
											2026-04-18 21:21:55 -06:00
+								                if stop_event is None:
 								                    await asyncio.sleep(interval)
 								                    continue
 								                try:
 								                    await asyncio.wait_for(stop_event.wait(), timeout=interval)
 								                except asyncio.TimeoutError:
 								                    continue
 								                return
-												Implement continuous typing indicator in message handling

- Added a new private method `_keep_typing` to send a typing indicator continuously while processing messages, refreshing every 4 seconds to comply with Telegram/Discord limitations.
- Updated the `handle_message` method to initiate the typing indicator at the start of message processing and ensure it stops once processing is complete, improving user experience during message handling.

											
										
										
											2026-02-03 14:51:31 -08:00
+								        except asyncio.CancelledError:
 								            pass  # Normal cancellation when handler completes
-												fix(discord): stop phantom typing indicator after agent turn completes (#3003)

Two fixes for a race where Discord's typing indicator lingers after the
agent finishes:

1. _keep_typing (root cause): after outer stop_typing() clears the task
   dict, _keep_typing wakes from its 2s sleep and calls send_typing()
   again, recreating an orphaned loop. Add a finally block so _keep_typing
   always calls stop_typing() on exit, cleaning up any loop it recreated.

2. _process_message_background (safety net): add stop_typing() after
   cancelling the typing task, catching any platform-level persistent
   typing tasks that slipped through.

Combines fixes from PR #2945 by catbusconductor (root cause in
_keep_typing) and PR #2832 by subrih (safety net in
_process_message_background).
											
										
										
											2026-03-25 11:28:28 -07:00
+								        finally:
 								            # Ensure the underlying platform typing loop is stopped.
 								            # _keep_typing may have called send_typing() after an outer
 								            # stop_typing() cleared the task dict, recreating the loop.
 								            # Cancelling _keep_typing alone won't clean that up.
 								            if hasattr(self, "stop_typing"):
 								                try:
 								                    await self.stop_typing(chat_id)
 								                except Exception:
 								                    pass
-												fix: pause typing indicator during approval waits (#5893)

When the agent waits for dangerous-command approval, the typing
indicator (_keep_typing loop) kept refreshing. On Slack's Assistant
API this is critical: assistant_threads_setStatus disables the
compose box, preventing users from typing /approve or /deny.

- Add _typing_paused set + pause/resume methods to BasePlatformAdapter
- _keep_typing skips send_typing when chat_id is paused
- _approval_notify_sync pauses typing before sending approval prompt
- _handle_approve_command / _handle_deny_command resume typing after

Benefits all platforms — no reason to show 'is thinking...' while
the agent is idle waiting for human input.
											
										
										
											2026-04-07 11:04:50 -07:00
+								            self._typing_paused.discard(chat_id)
 								    def pause_typing_for_chat(self, chat_id: str) -> None:
 								        """Pause typing indicator for a chat (e.g. during approval waits).
 								        Thread-safe (CPython GIL) — can be called from the sync agent thread
 								        while ``_keep_typing`` runs on the async event loop.
 								        """
 								        self._typing_paused.add(chat_id)
 								    def resume_typing_for_chat(self, chat_id: str) -> None:
 								        """Resume typing indicator for a chat after approval resolves."""
 								        self._typing_paused.discard(chat_id)
-												fix(gateway): stop typing loops on session interrupt

											
										
										
											2026-04-18 21:21:55 -06:00
+								    async def interrupt_session_activity(self, session_key: str, chat_id: str) -> None:
 								        """Signal the active session loop to stop and clear typing immediately."""
 								        if session_key:
 								            interrupt_event = self._active_sessions.get(session_key)
 								            if interrupt_event is not None:
 								                interrupt_event.set()
 								        try:
 								            await self.stop_typing(chat_id)
 								        except Exception:
 								            pass
-												fix: tighten gateway interrupt salvage follow-ups

Follow-up on top of the helix4u #12388 cherry-picks:
- make deferred post-delivery callbacks generation-aware end-to-end so
  stale runs cannot clear callbacks registered by a fresher run for the
  same session
- bind callback ownership to the active session event at run start and
  snapshot that generation inside base adapter processing so later event
  mutation cannot retarget cleanup
- pass run_generation through proxy mode and drop stale proxy streams /
  final results the same way local runs are dropped
- centralize stop/new interrupt cleanup into one helper and replace the
  open-coded branches with shared logic
- unify internal control interrupt reason strings via shared constants
- remove the return from base.py's finally block so cleanup no longer
  swallows cancellation/exception flow
- add focused regressions for generation forwarding, proxy stale
  suppression, and newer-callback preservation

This addresses all review findings from the initial #12388 review while
keeping the fix scoped to stale-output/typing-loop interrupt handling.

											
										
										
											2026-04-19 15:05:14 +05:30
+								    def register_post_delivery_callback(
 								        self,
 								        session_key: str,
 								        callback: Callable,
 								        *,
 								        generation: int | None = None,
 								    ) -> None:
 								        """Register a deferred callback to fire after the main response.
 								        ``generation`` lets callers tie the callback to a specific gateway run
 								        generation so stale runs cannot clear callbacks owned by a fresher run.
 								        """
 								        if not session_key or not callable(callback):
 								            return
 								        if generation is None:
 								            self._post_delivery_callbacks[session_key] = callback
 								        else:
 								            self._post_delivery_callbacks[session_key] = (int(generation), callback)
 								    def pop_post_delivery_callback(
 								        self,
 								        session_key: str,
 								        *,
 								        generation: int | None = None,
 								    ) -> Callable | None:
 								        """Pop a deferred callback, optionally requiring generation ownership."""
 								        if not session_key:
 								            return None
 								        entry = self._post_delivery_callbacks.get(session_key)
 								        if entry is None:
 								            return None
 								        if isinstance(entry, tuple) and len(entry) == 2:
 								            entry_generation, callback = entry
 								            if generation is not None and int(entry_generation) != int(generation):
 								                return None
 								            self._post_delivery_callbacks.pop(session_key, None)
 								            return callback if callable(callback) else None
 								        if generation is not None:
 								            return None
 								        self._post_delivery_callbacks.pop(session_key, None)
 								        return entry if callable(entry) else None
-												feat(discord): add message processing reactions (salvage #1980) (#3871)

Adds lifecycle hooks to the base platform adapter so Discord (and future
platforms) can react to message processing events:

  👀  when processing starts
  ✅  on successful completion (delivery confirmed)
  ❌  on failure, error, or cancellation

Implementation:
- base.py: on_processing_start/on_processing_complete hooks with
  _run_processing_hook error isolation wrapper; delivery tracking
  via _record_delivery closure for accurate success detection
- discord.py: _add_reaction/_remove_reaction helpers + hook overrides
- Tests for base hook lifecycle and Discord-specific reactions

Co-authored-by: alanwilhelm <alanwilhelm@users.noreply.github.com>
											
										
										
											2026-03-29 21:55:23 -07:00
+								    # ── Processing lifecycle hooks ──────────────────────────────────────────
 								    # Subclasses override these to react to message processing events
 								    # (e.g. Discord adds 👀/✅/❌ reactions).
 								    async def on_processing_start(self, event: MessageEvent) -> None:
 								        """Hook called when background processing begins."""
-												fix(gateway): avoid false failure reactions on restart cancellation

											
										
										
											2026-04-08 16:07:07 -07:00
+								    async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
-												feat(discord): add message processing reactions (salvage #1980) (#3871)

Adds lifecycle hooks to the base platform adapter so Discord (and future
platforms) can react to message processing events:

  👀  when processing starts
  ✅  on successful completion (delivery confirmed)
  ❌  on failure, error, or cancellation

Implementation:
- base.py: on_processing_start/on_processing_complete hooks with
  _run_processing_hook error isolation wrapper; delivery tracking
  via _record_delivery closure for accurate success detection
- discord.py: _add_reaction/_remove_reaction helpers + hook overrides
- Tests for base hook lifecycle and Discord-specific reactions

Co-authored-by: alanwilhelm <alanwilhelm@users.noreply.github.com>
											
										
										
											2026-03-29 21:55:23 -07:00
+								        """Hook called when background processing completes."""
 								    async def _run_processing_hook(self, hook_name: str, *args: Any, **kwargs: Any) -> None:
 								        """Run a lifecycle hook without letting failures break message flow."""
 								        hook = getattr(self, hook_name, None)
 								        if not callable(hook):
 								            return
 								        try:
 								            await hook(*args, **kwargs)
 								        except Exception as e:
 								            logger.warning("[%s] %s hook failed: %s", self.name, hook_name, e)
-												fix(gateway): retry transient send failures and notify user on exhaustion (#3288)

When send() fails due to a network error (ConnectError, ReadTimeout, etc.),
the failure was silently logged and the user received no feedback — appearing
as a hang. In one reported case, a user waited 1+ hour for a response that
had already been generated but failed to deliver (#2910).

Adds _send_with_retry() to BasePlatformAdapter:
- Transient errors: retry up to 2x with exponential backoff + jitter
- On exhaustion: send delivery-failure notice so user knows to retry
- Permanent errors: fall back to plain-text version (preserves existing behavior)
- SendResult.retryable flag for platform-specific transient errors

All adapters benefit automatically via BasePlatformAdapter inheritance.

Cherry-picked from PR #3108 by Mibayy.

Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 17:37:10 -07:00
+								    @staticmethod
 								    def _is_retryable_error(error: Optional[str]) -> bool:
 								        """Return True if the error string looks like a transient network failure."""
 								        if not error:
 								            return False
 								        lowered = error.lower()
 								        return any(pat in lowered for pat in _RETRYABLE_ERROR_PATTERNS)
-												fix(telegram): prevent duplicate message delivery on send timeout (#5153)

TimedOut is a subclass of NetworkError in python-telegram-bot. The
inner retry loop in send() and the outer _send_with_retry() in base.py
both treated it as a transient connection error and retried — but
send_message is not idempotent. When the request reaches Telegram but
the HTTP response times out, the message is already delivered. Retrying
sends duplicates. Worst case: up to 9 copies (inner 3x × outer 3x).

Inner loop (telegram.py):
- Import TimedOut separately, isinstance-check before generic
  NetworkError retry (same pattern as BadRequest carve-out from #3390)
- Re-raise immediately — no retry
- Mark as retryable=False in outer exception handler

Outer loop (base.py):
- Remove 'timeout', 'timed out', 'readtimeout', 'writetimeout' from
  _RETRYABLE_ERROR_PATTERNS (read/write timeouts are delivery-ambiguous)
- Add 'connecttimeout' (safe — connection never established)
- Keep 'network' (other platforms still need it)
- Add _is_timeout_error() + early return to prevent plain-text fallback
  on timeout errors (would also cause duplicate delivery)

Connection errors (ConnectionReset, ConnectError, etc.) are still
retried — these fail before the request reaches the server.

Credit: tmdgusya (PR #3899), barun1997 (PR #3904) for identifying the
bug and proposing fixes.

Closes #3899, closes #3904.
											
										
										
											2026-04-04 19:05:34 -07:00
+								    @staticmethod
 								    def _is_timeout_error(error: Optional[str]) -> bool:
 								        """Return True if the error string indicates a read/write timeout.
 								        Timeout errors are NOT retryable and should NOT trigger plain-text
 								        fallback — the request may have already been delivered.
 								        """
 								        if not error:
 								            return False
 								        lowered = error.lower()
 								        return "timed out" in lowered or "readtimeout" in lowered or "writetimeout" in lowered
-												fix(gateway): retry transient send failures and notify user on exhaustion (#3288)

When send() fails due to a network error (ConnectError, ReadTimeout, etc.),
the failure was silently logged and the user received no feedback — appearing
as a hang. In one reported case, a user waited 1+ hour for a response that
had already been generated but failed to deliver (#2910).

Adds _send_with_retry() to BasePlatformAdapter:
- Transient errors: retry up to 2x with exponential backoff + jitter
- On exhaustion: send delivery-failure notice so user knows to retry
- Permanent errors: fall back to plain-text version (preserves existing behavior)
- SendResult.retryable flag for platform-specific transient errors

All adapters benefit automatically via BasePlatformAdapter inheritance.

Cherry-picked from PR #3108 by Mibayy.

Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 17:37:10 -07:00
+								    async def _send_with_retry(
 								        self,
 								        chat_id: str,
 								        content: str,
 								        reply_to: Optional[str] = None,
 								        metadata: Any = None,
 								        max_retries: int = 2,
 								        base_delay: float = 2.0,
 								    ) -> "SendResult":
 								        """
 								        Send a message with automatic retry for transient network errors.
 								        On permanent failures (e.g. formatting / permission errors) falls back
 								        to a plain-text version before giving up. If all attempts fail due to
 								        network errors, sends the user a brief delivery-failure notice so they
 								        know to retry rather than waiting indefinitely.
 								        """
 								        result = await self.send(
 								            chat_id=chat_id,
 								            content=content,
 								            reply_to=reply_to,
 								            metadata=metadata,
 								        )
 								        if result.success:
 								            return result
 								        error_str = result.error or ""
 								        is_network = result.retryable or self._is_retryable_error(error_str)
-												fix(telegram): prevent duplicate message delivery on send timeout (#5153)

TimedOut is a subclass of NetworkError in python-telegram-bot. The
inner retry loop in send() and the outer _send_with_retry() in base.py
both treated it as a transient connection error and retried — but
send_message is not idempotent. When the request reaches Telegram but
the HTTP response times out, the message is already delivered. Retrying
sends duplicates. Worst case: up to 9 copies (inner 3x × outer 3x).

Inner loop (telegram.py):
- Import TimedOut separately, isinstance-check before generic
  NetworkError retry (same pattern as BadRequest carve-out from #3390)
- Re-raise immediately — no retry
- Mark as retryable=False in outer exception handler

Outer loop (base.py):
- Remove 'timeout', 'timed out', 'readtimeout', 'writetimeout' from
  _RETRYABLE_ERROR_PATTERNS (read/write timeouts are delivery-ambiguous)
- Add 'connecttimeout' (safe — connection never established)
- Keep 'network' (other platforms still need it)
- Add _is_timeout_error() + early return to prevent plain-text fallback
  on timeout errors (would also cause duplicate delivery)

Connection errors (ConnectionReset, ConnectError, etc.) are still
retried — these fail before the request reaches the server.

Credit: tmdgusya (PR #3899), barun1997 (PR #3904) for identifying the
bug and proposing fixes.

Closes #3899, closes #3904.
											
										
										
											2026-04-04 19:05:34 -07:00
+								        # Timeout errors are not safe to retry (message may have been
 								        # delivered) and not formatting errors — return the failure as-is.
 								        if not is_network and self._is_timeout_error(error_str):
 								            return result
-												fix(gateway): retry transient send failures and notify user on exhaustion (#3288)

When send() fails due to a network error (ConnectError, ReadTimeout, etc.),
the failure was silently logged and the user received no feedback — appearing
as a hang. In one reported case, a user waited 1+ hour for a response that
had already been generated but failed to deliver (#2910).

Adds _send_with_retry() to BasePlatformAdapter:
- Transient errors: retry up to 2x with exponential backoff + jitter
- On exhaustion: send delivery-failure notice so user knows to retry
- Permanent errors: fall back to plain-text version (preserves existing behavior)
- SendResult.retryable flag for platform-specific transient errors

All adapters benefit automatically via BasePlatformAdapter inheritance.

Cherry-picked from PR #3108 by Mibayy.

Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 17:37:10 -07:00
+								        if is_network:
 								            # Retry with exponential backoff for transient errors
 								            for attempt in range(1, max_retries + 1):
 								                delay = base_delay * (2 ** (attempt - 1)) + random.uniform(0, 1)
 								                logger.warning(
 								                    "[%s] Send failed (attempt %d/%d, retrying in %.1fs): %s",
 								                    self.name, attempt, max_retries, delay, error_str,
 								                )
 								                await asyncio.sleep(delay)
 								                result = await self.send(
 								                    chat_id=chat_id,
 								                    content=content,
 								                    reply_to=reply_to,
 								                    metadata=metadata,
 								                )
 								                if result.success:
 								                    logger.info("[%s] Send succeeded on retry %d", self.name, attempt)
 								                    return result
 								                error_str = result.error or ""
 								                if not (result.retryable or self._is_retryable_error(error_str)):
 								                    break  # error switched to non-transient — fall through to plain-text fallback
 								            else:
 								                # All retries exhausted (loop completed without break) — notify user
 								                logger.error("[%s] Failed to deliver response after %d retries: %s", self.name, max_retries, error_str)
 								                notice = (
 								                    "\u26a0\ufe0f Message delivery failed after multiple attempts. "
 								                    "Please try again \u2014 your request was processed but the response could not be sent."
 								                )
 								                try:
 								                    await self.send(chat_id=chat_id, content=notice, reply_to=reply_to, metadata=metadata)
 								                except Exception as notify_err:
 								                    logger.debug("[%s] Could not send delivery-failure notice: %s", self.name, notify_err)
 								                return result
 								        # Non-network / post-retry formatting failure: try plain text as fallback
 								        logger.warning("[%s] Send failed: %s — trying plain-text fallback", self.name, error_str)
 								        fallback_result = await self.send(
 								            chat_id=chat_id,
 								            content=f"(Response formatting failed, plain text:)\n\n{content[:3500]}",
 								            reply_to=reply_to,
 								            metadata=metadata,
 								        )
 								        if not fallback_result.success:
 								            logger.error("[%s] Fallback send also failed: %s", self.name, fallback_result.error)
 								        return fallback_result
-												fix: extend caption substring fix to all platforms

Move _merge_caption helper from TelegramAdapter to BasePlatformAdapter
so all adapters inherit it. Fix the same substring-containment bug in:
- gateway/platforms/base.py (photo burst merging)
- gateway/run.py (priority photo follow-up merging)
- gateway/platforms/feishu.py (media batch merging)

The original fix only covered telegram.py. The same bug existed in base.py
and run.py (pure substring check) and feishu.py (list membership without
whitespace normalization).

											
										
										
											2026-04-07 14:05:25 -07:00
+								    @staticmethod
 								    def _merge_caption(existing_text: Optional[str], new_text: str) -> str:
 								        """Merge a new caption into existing text, avoiding duplicates.
 								        Uses line-by-line exact match (not substring) to prevent false positives
 								        where a shorter caption is silently dropped because it appears as a
 								        substring of a longer one (e.g. "Meeting" inside "Meeting agenda").
 								        Whitespace is normalised for comparison.
 								        """
 								        if not existing_text:
 								            return new_text
 								        existing_captions = [c.strip() for c in existing_text.split("\n\n")]
 								        if new_text.strip() not in existing_captions:
 								            return f"{existing_text}\n\n{new_text}".strip()
 								        return existing_text
-												fix(gateway): serialize reset command handoff and heal stale session locks

Closes the adapter-side half of the split-brain described in issue #11016
where _active_sessions stays live but nothing is processing, trapping the
chat in repeated 'Interrupting current task...' while /stop reports no
active task.

Changes on BasePlatformAdapter:
- Add _session_tasks: Dict[str, asyncio.Task] mapping session -> owner task
  so session-terminating commands can cancel the right task and old task
  finally blocks can't clobber a newer task's guard.
- Add _release_session_guard(guard=...) that only releases if the guard
  Event still matches, preventing races where /stop or /new swaps in a
  temporary guard while the old task unwinds.
- Add _session_task_is_stale() and _heal_stale_session_lock() for
  on-entry self-heal: when handle_message() sees an _active_sessions
  entry whose RECORDED owner task is done/cancelled, clear it and fall
  through to normal dispatch.  No owner task recorded = not stale (some
  tests install guards directly and shouldn't be auto-healed).
- Add cancel_session_processing() as the explicit adapter-side cancel
  API so /stop/ /new/ /reset can cleanly tear down in-flight work.
- Route /stop, /new, /reset through _dispatch_active_session_command():
    1. install a temporary command guard so follow-ups stay queued
    2. let the runner process the command
    3. cancel the old adapter task AFTER the runner response is ready
    4. release the command guard and drain the latest pending follow-up
- _start_session_processing() replaces the inline create_task + guard
  setup in handle_message() so guard + owner-task entry land atomically.
- cancel_background_tasks() also clears _session_tasks.

Combined, this means:
- /stop / /new / /reset actually cancel stuck work instead of leaving
  adapter state desynced from runner state.
- A dead session lock self-heals on the next inbound message rather than
  persisting until gateway restart.
- Follow-up messages after /new are processed in order, after the reset
  command's runner response lands.

Refs #11016

											
										
										
											2026-04-23 03:13:08 -07:00
+								    # ------------------------------------------------------------------
 								    # Session task + guard ownership helpers
 								    # ------------------------------------------------------------------
 								    # These were introduced together with the _session_tasks owner map to
 								    # make session lifecycle reconciliation deterministic across (a) the
 								    # normal completion path, (b) /stop/ /new/ /reset bypass commands,
 								    # and (c) stale-lock self-heal on the next inbound message.
 								    def _release_session_guard(
 								        self,
 								        session_key: str,
 								        *,
 								        guard: Optional[asyncio.Event] = None,
 								    ) -> None:
 								        """Release the adapter-level guard for a session.
 								        When ``guard`` is provided, only release the entry if it still points
 								        at that exact Event.  This lets reset-like commands swap in a temporary
 								        guard while the old processing task unwinds, without having the old
 								        task's cleanup accidentally clear the replacement guard.
 								        """
 								        current_guard = self._active_sessions.get(session_key)
 								        if current_guard is None:
 								            return
 								        if guard is not None and current_guard is not guard:
 								            return
 								        del self._active_sessions[session_key]
 								    def _session_task_is_stale(self, session_key: str) -> bool:
 								        """Return True if the owner task for ``session_key`` is done/cancelled.
 								        A lock is "stale" when the adapter still has ``_active_sessions[key]``
 								        AND a known owner task in ``_session_tasks`` that has already exited.
 								        When there is no owner task at all, that usually means the guard was
 								        installed by some path other than handle_message() (tests sometimes
 								        install guards directly) — don't treat that as stale.  The on-entry
 								        self-heal only needs to handle the production split-brain case where
 								        an owner task was recorded, then exited without clearing its guard.
 								        """
 								        task = self._session_tasks.get(session_key)
 								        if task is None:
 								            return False
 								        done = getattr(task, "done", None)
 								        return bool(done and done())
 								    def _heal_stale_session_lock(self, session_key: str) -> bool:
 								        """Clear a stale session lock if the owner task is already gone.
 								        Returns True if a stale lock was healed.  Returns False if there is
 								        no lock, or the owner task is still alive (the normal busy case).
 								        This is the on-entry safety net sidbin's issue #11016 analysis calls
 								        for: without it, a split-brain — adapter still thinks the session is
 								        active, but nothing is actually processing — traps the chat in
 								        infinite "Interrupting current task..." until the gateway is
 								        restarted.
 								        """
 								        if session_key not in self._active_sessions:
 								            return False
 								        if not self._session_task_is_stale(session_key):
 								            return False
 								        logger.warning(
 								            "[%s] Healing stale session lock for %s (owner task is done/absent)",
 								            self.name,
 								            session_key,
 								        )
 								        self._active_sessions.pop(session_key, None)
 								        self._pending_messages.pop(session_key, None)
 								        self._session_tasks.pop(session_key, None)
 								        return True
 								    def _start_session_processing(
 								        self,
 								        event: MessageEvent,
 								        session_key: str,
 								        *,
 								        interrupt_event: Optional[asyncio.Event] = None,
 								    ) -> bool:
 								        """Spawn a background processing task under the given session guard.
 								        Returns True on success.  If the runtime stubs ``create_task`` with a
 								        non-Task sentinel (some tests do this), the guard is rolled back and
 								        False is returned so the caller isn't left holding a half-installed
 								        session lock.
 								        """
 								        guard = interrupt_event or asyncio.Event()
 								        self._active_sessions[session_key] = guard
 								        task = asyncio.create_task(self._process_message_background(event, session_key))
 								        self._session_tasks[session_key] = task
 								        try:
 								            self._background_tasks.add(task)
 								        except TypeError:
 								            # Tests stub create_task() with lightweight sentinels that are not
 								            # hashable and do not support lifecycle callbacks.
 								            self._session_tasks.pop(session_key, None)
 								            self._release_session_guard(session_key, guard=guard)
 								            return False
 								        if hasattr(task, "add_done_callback"):
 								            task.add_done_callback(self._background_tasks.discard)
 								            task.add_done_callback(self._expected_cancelled_tasks.discard)
 								        return True
 								    async def cancel_session_processing(
 								        self,
 								        session_key: str,
 								        *,
 								        release_guard: bool = True,
 								        discard_pending: bool = True,
 								    ) -> None:
 								        """Cancel in-flight processing for a single session.
 								        ``release_guard=False`` keeps the adapter-level session guard in place
 								        so reset-like commands can finish atomically before follow-up messages
 								        are allowed to start a fresh background task.
 								        """
 								        task = self._session_tasks.pop(session_key, None)
 								        if task is not None and not task.done():
 								            logger.debug(
 								                "[%s] Cancelling active processing for session %s",
 								                self.name,
 								                session_key,
 								            )
 								            self._expected_cancelled_tasks.add(task)
 								            task.cancel()
 								            try:
 								                await task
 								            except asyncio.CancelledError:
 								                pass
 								            except Exception:
 								                logger.debug(
 								                    "[%s] Session cancellation raised while unwinding %s",
 								                    self.name,
 								                    session_key,
 								                    exc_info=True,
 								                )
 								        if discard_pending:
 								            self._pending_messages.pop(session_key, None)
 								        if release_guard:
 								            self._release_session_guard(session_key)
 								    async def _drain_pending_after_session_command(
 								        self,
 								        session_key: str,
 								        command_guard: asyncio.Event,
 								    ) -> None:
 								        """Resume the latest queued follow-up once a session command completes.
 								        Called at the tail of /stop, /new, and /reset dispatch.  Releases the
 								        command-scoped guard, then — if a follow-up message landed while the
 								        command was running — spawns a fresh processing task for it.
 								        """
 								        pending_event = self._pending_messages.pop(session_key, None)
 								        self._release_session_guard(session_key, guard=command_guard)
 								        if pending_event is None:
 								            return
 								        self._start_session_processing(pending_event, session_key)
 								    async def _dispatch_active_session_command(
 								        self,
 								        event: MessageEvent,
 								        session_key: str,
 								        cmd: str,
 								    ) -> None:
 								        """Dispatch a reset-like bypass command while preserving guard ordering.
 								        /stop, /new, and /reset must:
 . Keep the session guard installed while the runner processes the
 								             command (so a racing follow-up message stays queued, not
 								             dispatched as a second parallel run).
 . Cancel the old in-flight adapter task only AFTER the runner has
 								             finished handling the command (so the runner sees consistent
 								             state and its response is sent in order).
 . Release the command-scoped guard and drain the latest queued
 								             follow-up exactly once, after 1 and 2 complete.
 								        """
 								        logger.debug(
 								            "[%s] Command '/%s' bypassing active-session guard for %s",
 								            self.name,
 								            cmd,
 								            session_key,
 								        )
 								        current_guard = self._active_sessions.get(session_key)
 								        command_guard = asyncio.Event()
 								        self._active_sessions[session_key] = command_guard
 								        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
 								        try:
 								            response = await self._message_handler(event)
 								            # Old adapter task (if any) is cancelled AFTER the runner has
 								            # fully handled the command — keeps ordering deterministic.
 								            await self.cancel_session_processing(
 								                session_key,
 								                release_guard=False,
 								                discard_pending=False,
 								            )
 								            if response:
 								                await self._send_with_retry(
 								                    chat_id=event.source.chat_id,
 								                    content=response,
 								                    reply_to=event.message_id,
 								                    metadata=thread_meta,
 								                )
 								        except Exception:
 								            # On failure, restore the original guard if one still exists so
 								            # we don't leave the session in a half-reset state.
 								            if self._active_sessions.get(session_key) is command_guard:
 								                if session_key in self._session_tasks and current_guard is not None:
 								                    self._active_sessions[session_key] = current_guard
 								                else:
 								                    self._release_session_guard(session_key, guard=command_guard)
 								            raise
 								        await self._drain_pending_after_session_command(session_key, command_guard)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def handle_message(self, event: MessageEvent) -> None:
 								        """
 								        Process an incoming message.
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
+								        This method returns quickly by spawning background tasks.
 								        This allows new messages to be processed even while an agent is running,
 								        enabling interruption support.
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        """
 								        if not self._message_handler:
 								            return
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        session_key = build_session_key(
 								            event.source,
 								            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
-												feat: shared thread sessions by default — multi-user thread support (#5391)

Threads (Telegram forum topics, Discord threads, Slack threads) now default
to shared sessions where all participants see the same conversation. This is
the expected UX for threaded conversations where multiple users @mention the
bot and interact collaboratively.

Changes:
- build_session_key(): when thread_id is present, user_id is no longer
  appended to the session key (threads are shared by default)
- New config: thread_sessions_per_user (default: false) — opt-in to restore
  per-user isolation in threads if needed
- Sender attribution: messages in shared threads are prefixed with
  [sender name] so the agent can tell participants apart
- System prompt: shared threads show 'Multi-user thread' note instead of
  a per-turn User line (avoids busting prompt cache)
- Wired through all callers: gateway/run.py, base.py, telegram.py, feishu.py
- Regular group messages (no thread) remain per-user isolated (unchanged)
- DM threads are unaffected (they have their own keying logic)

Closes community request from demontut_ re: thread-based shared sessions.
											
										
										
											2026-04-05 19:46:58 -07:00
+								            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        )
-												fix(gateway): serialize reset command handoff and heal stale session locks

Closes the adapter-side half of the split-brain described in issue #11016
where _active_sessions stays live but nothing is processing, trapping the
chat in repeated 'Interrupting current task...' while /stop reports no
active task.

Changes on BasePlatformAdapter:
- Add _session_tasks: Dict[str, asyncio.Task] mapping session -> owner task
  so session-terminating commands can cancel the right task and old task
  finally blocks can't clobber a newer task's guard.
- Add _release_session_guard(guard=...) that only releases if the guard
  Event still matches, preventing races where /stop or /new swaps in a
  temporary guard while the old task unwinds.
- Add _session_task_is_stale() and _heal_stale_session_lock() for
  on-entry self-heal: when handle_message() sees an _active_sessions
  entry whose RECORDED owner task is done/cancelled, clear it and fall
  through to normal dispatch.  No owner task recorded = not stale (some
  tests install guards directly and shouldn't be auto-healed).
- Add cancel_session_processing() as the explicit adapter-side cancel
  API so /stop/ /new/ /reset can cleanly tear down in-flight work.
- Route /stop, /new, /reset through _dispatch_active_session_command():
    1. install a temporary command guard so follow-ups stay queued
    2. let the runner process the command
    3. cancel the old adapter task AFTER the runner response is ready
    4. release the command guard and drain the latest pending follow-up
- _start_session_processing() replaces the inline create_task + guard
  setup in handle_message() so guard + owner-task entry land atomically.
- cancel_background_tasks() also clears _session_tasks.

Combined, this means:
- /stop / /new / /reset actually cancel stuck work instead of leaving
  adapter state desynced from runner state.
- A dead session lock self-heals on the next inbound message rather than
  persisting until gateway restart.
- Follow-up messages after /new are processed in order, after the reset
  command's runner response lands.

Refs #11016

											
										
										
											2026-04-23 03:13:08 -07:00
 								        # On-entry self-heal: if the adapter still has an _active_sessions
 								        # entry for this key but the owner task has already exited (done or
 								        # cancelled), the lock is stale.  Clear it and fall through to
 								        # normal dispatch so the user isn't trapped behind a dead guard —
 								        # this is the split-brain tail described in issue #11016.
 								        if session_key in self._active_sessions:
 								            self._heal_stale_session_lock(session_key)
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
+								        # Check if there's already an active handler for this session
 								        if session_key in self._active_sessions:
-												fix(gateway): /stop and /new bypass Level 1 active-session guard (#5765)

* fix(gateway): /stop and /new bypass Level 1 active-session guard

The base adapter's Level 1 guard intercepted ALL messages while an
agent was running, including /stop and /new. These commands were queued
as pending messages instead of being dispatched to the gateway runner's
Level 2 handler. When the agent eventually stopped (via the interrupt
mechanism), the command text leaked into the conversation as a user
message — the model would receive '/stop' as input and respond to it.

Fix: Add /stop, /new, and /reset to the bypass set in base.py alongside
/approve, /deny, and /status. Consolidate the three separate bypass
blocks into one. Commands in the bypass set are dispatched inline to the
gateway runner, where Level 2 handles them correctly (hard-kill for
/stop, session reset for /new).

Also add a safety net in _run_agent's pending-message processing: if the
pending text resolves to a known slash command, discard it instead of
passing it to the agent. This catches edge cases where command text
leaks through the interrupt_message fallback.

Refs: #5244

* test: regression tests for command bypass of active-session guard

17 tests covering:
- /stop, /new, /reset bypass the Level 1 guard when agent is running
- /approve, /deny, /status bypass (existing behavior, now tested)
- Regular text and unknown commands still queued (not bypassed)
- File paths like '/path/to/file' not treated as commands
- Telegram @botname suffix handled correctly
- Safety net command resolution (resolve_command detects known commands)
											
										
										
											2026-04-07 00:53:45 -07:00
+								            # Certain commands must bypass the active-session guard and be
 								            # dispatched directly to the gateway runner.  Without this, they
 								            # are queued as pending messages and either:
 								            #   - leak into the conversation as user text (/stop, /new), or
 								            #   - deadlock (/approve, /deny — agent is blocked on Event.wait)
 								            #
 								            # Dispatch inline: call the message handler directly and send the
 								            # response.  Do NOT use _process_message_background — it manages
 								            # session lifecycle and its cleanup races with the running task
 								            # (see PR #4926).
-												fix(gateway): bypass active-session guard for /approve and /deny commands (#4926)

The base adapter's active-session guard queues all messages when an agent
is running. This creates a deadlock for /approve and /deny: the agent
thread is blocked on threading.Event.wait() in tools/approval.py waiting
for resolve_gateway_approval(), but the /approve command is queued waiting
for the agent to finish.

Dispatch /approve and /deny directly to the message handler (which routes
to gateway/run.py's _handle_approve_command) without going through
_process_message_background — avoids spawning a competing background task
that would mess with session lifecycle/guards.

Fixes #4898
Co-authored-by: mechovation (original diagnosis in PR #4904)
											
										
										
											2026-04-03 20:08:37 -07:00
+								            cmd = event.get_command()
-												fix(gateway): bypass active-session guard for gateway-handled slash commands

											
										
										
											2026-04-17 04:08:20 +03:00
+								            from hermes_cli.commands import should_bypass_active_session
 								            if should_bypass_active_session(cmd):
-												fix(gateway): serialize reset command handoff and heal stale session locks

Closes the adapter-side half of the split-brain described in issue #11016
where _active_sessions stays live but nothing is processing, trapping the
chat in repeated 'Interrupting current task...' while /stop reports no
active task.

Changes on BasePlatformAdapter:
- Add _session_tasks: Dict[str, asyncio.Task] mapping session -> owner task
  so session-terminating commands can cancel the right task and old task
  finally blocks can't clobber a newer task's guard.
- Add _release_session_guard(guard=...) that only releases if the guard
  Event still matches, preventing races where /stop or /new swaps in a
  temporary guard while the old task unwinds.
- Add _session_task_is_stale() and _heal_stale_session_lock() for
  on-entry self-heal: when handle_message() sees an _active_sessions
  entry whose RECORDED owner task is done/cancelled, clear it and fall
  through to normal dispatch.  No owner task recorded = not stale (some
  tests install guards directly and shouldn't be auto-healed).
- Add cancel_session_processing() as the explicit adapter-side cancel
  API so /stop/ /new/ /reset can cleanly tear down in-flight work.
- Route /stop, /new, /reset through _dispatch_active_session_command():
    1. install a temporary command guard so follow-ups stay queued
    2. let the runner process the command
    3. cancel the old adapter task AFTER the runner response is ready
    4. release the command guard and drain the latest pending follow-up
- _start_session_processing() replaces the inline create_task + guard
  setup in handle_message() so guard + owner-task entry land atomically.
- cancel_background_tasks() also clears _session_tasks.

Combined, this means:
- /stop / /new / /reset actually cancel stuck work instead of leaving
  adapter state desynced from runner state.
- A dead session lock self-heals on the next inbound message rather than
  persisting until gateway restart.
- Follow-up messages after /new are processed in order, after the reset
  command's runner response lands.

Refs #11016

											
										
										
											2026-04-23 03:13:08 -07:00
+								                # /stop, /new, /reset must cancel the in-flight adapter task
 								                # and preserve ordering of queued follow-ups.  Route those
 								                # through the dedicated handoff path that serializes
 								                # cancellation + runner response + pending drain.
 								                if cmd in ("stop", "new", "reset"):
 								                    try:
 								                        await self._dispatch_active_session_command(event, session_key, cmd)
 								                    except Exception as e:
 								                        logger.error(
 								                            "[%s] Command '/%s' dispatch failed: %s",
 								                            self.name, cmd, e, exc_info=True,
 								                        )
 								                    return
 								                # Other bypass commands (/approve, /deny, /status,
 								                # /background, /restart) just need direct dispatch — they
 								                # don't cancel the running task.
-												fix(gateway): bypass active-session guard for /approve and /deny commands (#4926)

The base adapter's active-session guard queues all messages when an agent
is running. This creates a deadlock for /approve and /deny: the agent
thread is blocked on threading.Event.wait() in tools/approval.py waiting
for resolve_gateway_approval(), but the /approve command is queued waiting
for the agent to finish.

Dispatch /approve and /deny directly to the message handler (which routes
to gateway/run.py's _handle_approve_command) without going through
_process_message_background — avoids spawning a competing background task
that would mess with session lifecycle/guards.

Fixes #4898
Co-authored-by: mechovation (original diagnosis in PR #4904)
											
										
										
											2026-04-03 20:08:37 -07:00
+								                logger.debug(
-												fix(gateway): /stop and /new bypass Level 1 active-session guard (#5765)

* fix(gateway): /stop and /new bypass Level 1 active-session guard

The base adapter's Level 1 guard intercepted ALL messages while an
agent was running, including /stop and /new. These commands were queued
as pending messages instead of being dispatched to the gateway runner's
Level 2 handler. When the agent eventually stopped (via the interrupt
mechanism), the command text leaked into the conversation as a user
message — the model would receive '/stop' as input and respond to it.

Fix: Add /stop, /new, and /reset to the bypass set in base.py alongside
/approve, /deny, and /status. Consolidate the three separate bypass
blocks into one. Commands in the bypass set are dispatched inline to the
gateway runner, where Level 2 handles them correctly (hard-kill for
/stop, session reset for /new).

Also add a safety net in _run_agent's pending-message processing: if the
pending text resolves to a known slash command, discard it instead of
passing it to the agent. This catches edge cases where command text
leaks through the interrupt_message fallback.

Refs: #5244

* test: regression tests for command bypass of active-session guard

17 tests covering:
- /stop, /new, /reset bypass the Level 1 guard when agent is running
- /approve, /deny, /status bypass (existing behavior, now tested)
- Regular text and unknown commands still queued (not bypassed)
- File paths like '/path/to/file' not treated as commands
- Telegram @botname suffix handled correctly
- Safety net command resolution (resolve_command detects known commands)
											
										
										
											2026-04-07 00:53:45 -07:00
+								                    "[%s] Command '/%s' bypassing active-session guard for %s",
-												fix(gateway): bypass active-session guard for /approve and /deny commands (#4926)

The base adapter's active-session guard queues all messages when an agent
is running. This creates a deadlock for /approve and /deny: the agent
thread is blocked on threading.Event.wait() in tools/approval.py waiting
for resolve_gateway_approval(), but the /approve command is queued waiting
for the agent to finish.

Dispatch /approve and /deny directly to the message handler (which routes
to gateway/run.py's _handle_approve_command) without going through
_process_message_background — avoids spawning a competing background task
that would mess with session lifecycle/guards.

Fixes #4898
Co-authored-by: mechovation (original diagnosis in PR #4904)
											
										
										
											2026-04-03 20:08:37 -07:00
+								                    self.name, cmd, session_key,
 								                )
 								                try:
 								                    _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
 								                    response = await self._message_handler(event)
 								                    if response:
 								                        await self._send_with_retry(
 								                            chat_id=event.source.chat_id,
 								                            content=response,
 								                            reply_to=event.message_id,
 								                            metadata=_thread_meta,
 								                        )
 								                except Exception as e:
-												fix(gateway): /stop and /new bypass Level 1 active-session guard (#5765)

* fix(gateway): /stop and /new bypass Level 1 active-session guard

The base adapter's Level 1 guard intercepted ALL messages while an
agent was running, including /stop and /new. These commands were queued
as pending messages instead of being dispatched to the gateway runner's
Level 2 handler. When the agent eventually stopped (via the interrupt
mechanism), the command text leaked into the conversation as a user
message — the model would receive '/stop' as input and respond to it.

Fix: Add /stop, /new, and /reset to the bypass set in base.py alongside
/approve, /deny, and /status. Consolidate the three separate bypass
blocks into one. Commands in the bypass set are dispatched inline to the
gateway runner, where Level 2 handles them correctly (hard-kill for
/stop, session reset for /new).

Also add a safety net in _run_agent's pending-message processing: if the
pending text resolves to a known slash command, discard it instead of
passing it to the agent. This catches edge cases where command text
leaks through the interrupt_message fallback.

Refs: #5244

* test: regression tests for command bypass of active-session guard

17 tests covering:
- /stop, /new, /reset bypass the Level 1 guard when agent is running
- /approve, /deny, /status bypass (existing behavior, now tested)
- Regular text and unknown commands still queued (not bypassed)
- File paths like '/path/to/file' not treated as commands
- Telegram @botname suffix handled correctly
- Safety net command resolution (resolve_command detects known commands)
											
										
										
											2026-04-07 00:53:45 -07:00
+								                    logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
-												fix: /status command bypasses active-session guard during agent run (#5046)

When an agent was actively processing a message, /status sent via Telegram
(or any gateway) was queued as a pending interrupt instead of being dispatched
immediately. The base platform adapter's handle_message() only had special-case
bypass logic for /approve and /deny, so /status fell through to the default
interrupt path and was never processed as a system command.

Apply the same bypass pattern used by /approve//deny: detect cmd == 'status'
inside the active-session guard, dispatch directly to the message handler, and
send the response without touching session lifecycle or interrupt state.

Adds a regression test that verifies /status is dispatched and responded to
immediately even when _active_sessions contains an entry for the session.

											
										
										
											2026-04-04 20:21:29 +03:00
+								                return
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            if self._busy_session_handler is not None:
 								                try:
 								                    if await self._busy_session_handler(event, session_key):
 								                        return
 								                except Exception as e:
 								                    logger.error("[%s] Busy-session handler failed: %s", self.name, e, exc_info=True)
-												fix(gateway): prevent telegram photo burst interrupts

											
										
										
											2026-03-15 11:58:19 +05:30
+								            # Special case: photo bursts/albums frequently arrive as multiple near-
 								            # simultaneous messages. Queue them without interrupting the active run,
 								            # then process them immediately after the current task finishes.
 								            if event.message_type == MessageType.PHOTO:
-												fix(gateway): replace print() with logger calls in BasePlatformAdapter (#3669)

Salvage of PR #3616 (memosr). Replaces 6 print() calls with proper logger calls in BasePlatformAdapter + removes redundant traceback.print_exc().

Co-Authored-By: memosr <memosr@users.noreply.github.com>
											
										
										
											2026-03-28 22:25:35 -07:00
+								                logger.debug("[%s] Queuing photo follow-up for session %s without interrupt", self.name, session_key)
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								                merge_pending_message_event(self._pending_messages, session_key, event)
-												fix(gateway): prevent telegram photo burst interrupts

											
										
										
											2026-03-15 11:58:19 +05:30
+								                return  # Don't interrupt now - will run after current task completes
 								            # Default behavior for non-photo follow-ups: interrupt the running agent
-												fix(gateway): replace print() with logger calls in BasePlatformAdapter (#3669)

Salvage of PR #3616 (memosr). Replaces 6 print() calls with proper logger calls in BasePlatformAdapter + removes redundant traceback.print_exc().

Co-Authored-By: memosr <memosr@users.noreply.github.com>
											
										
										
											2026-03-28 22:25:35 -07:00
+								            logger.debug("[%s] New message while session %s is active — triggering interrupt", self.name, session_key)
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
+								            self._pending_messages[session_key] = event
 								            # Signal the interrupt (the processing task checks this)
 								            self._active_sessions[session_key].set()
 								            return  # Don't process now - will be handled after current task finishes
-												fix(gateway): race condition, photo media loss, and flood control in Telegram

Three bugs causing intermittent silent drops, partial responses, and
flood control delays on the Telegram platform:

1. Race condition in handle_message() — _active_sessions was set inside
   the background task, not before create_task(). Two rapid messages
   could both pass the guard and spawn duplicate processing tasks.
   Fix: set _active_sessions synchronously before spawning the task
   (grammY sequentialize / aiogram EventIsolation pattern).

2. Photo media loss on dequeue — when a photo (no caption) was queued
   during active processing and later dequeued, only .text was
   extracted. Empty text → message silently dropped.
   Fix: _build_media_placeholder() creates text context for media-only
   events so they survive the dequeue path.

3. Progress message edits triggered Telegram flood control — rapid tool
   calls edited the progress message every 0.3s, hitting Telegram's
   rate limit (23s+ waits). This blocked progress updates and could
   cause stream consumer timeouts.
   Fix: throttle edits to 1.5s minimum interval, detect flood control
   errors and gracefully degrade to new messages. edit_message() now
   returns failure for flood waits >5s instead of blocking.

											
										
										
											2026-04-02 16:32:21 +05:30
+								        # Mark session as active BEFORE spawning background task to close
 								        # the race window where a second message arriving before the task
 								        # starts would also pass the _active_sessions check and spawn a
 								        # duplicate task.  (grammY sequentialize / aiogram EventIsolation
 								        # pattern — set the guard synchronously, not inside the task.)
-												fix(gateway): serialize reset command handoff and heal stale session locks

Closes the adapter-side half of the split-brain described in issue #11016
where _active_sessions stays live but nothing is processing, trapping the
chat in repeated 'Interrupting current task...' while /stop reports no
active task.

Changes on BasePlatformAdapter:
- Add _session_tasks: Dict[str, asyncio.Task] mapping session -> owner task
  so session-terminating commands can cancel the right task and old task
  finally blocks can't clobber a newer task's guard.
- Add _release_session_guard(guard=...) that only releases if the guard
  Event still matches, preventing races where /stop or /new swaps in a
  temporary guard while the old task unwinds.
- Add _session_task_is_stale() and _heal_stale_session_lock() for
  on-entry self-heal: when handle_message() sees an _active_sessions
  entry whose RECORDED owner task is done/cancelled, clear it and fall
  through to normal dispatch.  No owner task recorded = not stale (some
  tests install guards directly and shouldn't be auto-healed).
- Add cancel_session_processing() as the explicit adapter-side cancel
  API so /stop/ /new/ /reset can cleanly tear down in-flight work.
- Route /stop, /new, /reset through _dispatch_active_session_command():
    1. install a temporary command guard so follow-ups stay queued
    2. let the runner process the command
    3. cancel the old adapter task AFTER the runner response is ready
    4. release the command guard and drain the latest pending follow-up
- _start_session_processing() replaces the inline create_task + guard
  setup in handle_message() so guard + owner-task entry land atomically.
- cancel_background_tasks() also clears _session_tasks.

Combined, this means:
- /stop / /new / /reset actually cancel stuck work instead of leaving
  adapter state desynced from runner state.
- A dead session lock self-heals on the next inbound message rather than
  persisting until gateway restart.
- Follow-up messages after /new are processed in order, after the reset
  command's runner response lands.

Refs #11016

											
										
										
											2026-04-23 03:13:08 -07:00
+								        # _start_session_processing installs the guard AND the owner-task
 								        # mapping atomically so stale-lock detection works.
 								        self._start_session_processing(event, session_key)
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								    @staticmethod
 								    def _get_human_delay() -> float:
 								        """
 								        Return a random delay in seconds for human-like response pacing.
 								        Reads from env vars:
 								          HERMES_HUMAN_DELAY_MODE: "off" (default) | "natural" | "custom"
 								          HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
 								          HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
 								        """
 								        mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
 								        if mode == "off":
 								            return 0.0
 								        min_ms = int(os.getenv("HERMES_HUMAN_DELAY_MIN_MS", "800"))
 								        max_ms = int(os.getenv("HERMES_HUMAN_DELAY_MAX_MS", "2500"))
 								        if mode == "natural":
 								            min_ms, max_ms = 800, 2500
 								        return random.uniform(min_ms / 1000.0, max_ms / 1000.0)
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
+								    async def _process_message_background(self, event: MessageEvent, session_key: str) -> None:
 								        """Background task that actually processes the message."""
-												feat(discord): add message processing reactions (salvage #1980) (#3871)

Adds lifecycle hooks to the base platform adapter so Discord (and future
platforms) can react to message processing events:

  👀  when processing starts
  ✅  on successful completion (delivery confirmed)
  ❌  on failure, error, or cancellation

Implementation:
- base.py: on_processing_start/on_processing_complete hooks with
  _run_processing_hook error isolation wrapper; delivery tracking
  via _record_delivery closure for accurate success detection
- discord.py: _add_reaction/_remove_reaction helpers + hook overrides
- Tests for base hook lifecycle and Discord-specific reactions

Co-authored-by: alanwilhelm <alanwilhelm@users.noreply.github.com>
											
										
										
											2026-03-29 21:55:23 -07:00
+								        # Track delivery outcomes for the processing-complete hook
 								        delivery_attempted = False
 								        delivery_succeeded = False
 								        def _record_delivery(result):
 								            nonlocal delivery_attempted, delivery_succeeded
 								            if result is None:
 								                return
 								            delivery_attempted = True
 								            if getattr(result, "success", False):
 								                delivery_succeeded = True
-												fix(gateway): race condition, photo media loss, and flood control in Telegram

Three bugs causing intermittent silent drops, partial responses, and
flood control delays on the Telegram platform:

1. Race condition in handle_message() — _active_sessions was set inside
   the background task, not before create_task(). Two rapid messages
   could both pass the guard and spawn duplicate processing tasks.
   Fix: set _active_sessions synchronously before spawning the task
   (grammY sequentialize / aiogram EventIsolation pattern).

2. Photo media loss on dequeue — when a photo (no caption) was queued
   during active processing and later dequeued, only .text was
   extracted. Empty text → message silently dropped.
   Fix: _build_media_placeholder() creates text context for media-only
   events so they survive the dequeue path.

3. Progress message edits triggered Telegram flood control — rapid tool
   calls edited the progress message every 0.3s, hitting Telegram's
   rate limit (23s+ waits). This blocked progress updates and could
   cause stream consumer timeouts.
   Fix: throttle edits to 1.5s minimum interval, detect flood control
   errors and gracefully degrade to new messages. edit_message() now
   returns failure for flood waits >5s instead of blocking.

											
										
										
											2026-04-02 16:32:21 +05:30
+								        # Reuse the interrupt event set by handle_message() (which marks
 								        # the session active before spawning this task to prevent races).
 								        # Fall back to a new Event only if the entry was removed externally.
 								        interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
+								        self._active_sessions[session_key] = interrupt_event
-												fix: tighten gateway interrupt salvage follow-ups

Follow-up on top of the helix4u #12388 cherry-picks:
- make deferred post-delivery callbacks generation-aware end-to-end so
  stale runs cannot clear callbacks registered by a fresher run for the
  same session
- bind callback ownership to the active session event at run start and
  snapshot that generation inside base adapter processing so later event
  mutation cannot retarget cleanup
- pass run_generation through proxy mode and drop stale proxy streams /
  final results the same way local runs are dropped
- centralize stop/new interrupt cleanup into one helper and replace the
  open-coded branches with shared logic
- unify internal control interrupt reason strings via shared constants
- remove the return from base.py's finally block so cleanup no longer
  swallows cancellation/exception flow
- add focused regressions for generation forwarding, proxy stale
  suppression, and newer-callback preservation

This addresses all review findings from the initial #12388 review while
keeping the fix scoped to stale-output/typing-loop interrupt handling.

											
										
										
											2026-04-19 15:05:14 +05:30
+								        callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
-												Refactor message handling and error logging in agent and gateway

- Updated the AIAgent class to extract the first user message for trajectory formatting, improving the accuracy of user queries in the trajectory format.
- Enhanced the GatewayRunner to convert transcript history into the agent format, ensuring proper handling of message roles and content.
- Adjusted the typing indicator refresh rate to every 2 seconds for better responsiveness.
- Improved error handling in the message sending process for the Telegram adapter, implementing a fallback mechanism for Markdown parsing failures, and logging send failures for better debugging.

											
										
										
											2026-02-03 15:42:54 -08:00
+								        # Start continuous typing indicator (refreshes every 2 seconds)
-												fix: forward thread_id metadata for Telegram forum topic routing

Replies in Telegram forum topics (supergroups with topics) now land in
the correct topic thread instead of 'General'.

- base.py: build thread_id metadata from event.source, pass to all
  send/media calls; add metadata param to send_typing, send_image,
  send_animation, send_voice, send_video, send_document, send_image_file,
  _keep_typing
- telegram.py: extract thread_id from metadata and pass as
  message_thread_id to all Bot API calls (send_photo, send_voice,
  send_audio, send_animation, send_chat_action)
- run.py: pass thread_id metadata to progress/streaming send calls
- discord/slack/whatsapp/homeassistant: update send_typing signature

Based on the fix proposed by @Bitstreamono in PR #656.

											
										
										
											2026-03-10 06:21:15 -07:00
+								        _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-												fix(gateway): keep typing loop overrides backward-compatible

											
										
										
											2026-04-18 21:32:49 -06:00
+								        _keep_typing_kwargs = {"metadata": _thread_metadata}
 								        try:
 								            _keep_typing_sig = inspect.signature(self._keep_typing)
 								        except (TypeError, ValueError):
 								            _keep_typing_sig = None
 								        if _keep_typing_sig is None or "stop_event" in _keep_typing_sig.parameters:
 								            _keep_typing_kwargs["stop_event"] = interrupt_event
-												fix(gateway): stop typing loops on session interrupt

											
										
										
											2026-04-18 21:21:55 -06:00
+								        typing_task = asyncio.create_task(
 								            self._keep_typing(
 								                event.source.chat_id,
-												fix(gateway): keep typing loop overrides backward-compatible

											
										
										
											2026-04-18 21:32:49 -06:00
+								                **_keep_typing_kwargs,
-												fix(gateway): stop typing loops on session interrupt

											
										
										
											2026-04-18 21:21:55 -06:00
+								            )
 								        )
-												Implement continuous typing indicator in message handling

- Added a new private method `_keep_typing` to send a typing indicator continuously while processing messages, refreshing every 4 seconds to comply with Telegram/Discord limitations.
- Updated the `handle_message` method to initiate the typing indicator at the start of message processing and ensure it stops once processing is complete, improving user experience during message handling.

											
										
										
											2026-02-03 14:51:31 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        try:
-												feat(discord): add message processing reactions (salvage #1980) (#3871)

Adds lifecycle hooks to the base platform adapter so Discord (and future
platforms) can react to message processing events:

  👀  when processing starts
  ✅  on successful completion (delivery confirmed)
  ❌  on failure, error, or cancellation

Implementation:
- base.py: on_processing_start/on_processing_complete hooks with
  _run_processing_hook error isolation wrapper; delivery tracking
  via _record_delivery closure for accurate success detection
- discord.py: _add_reaction/_remove_reaction helpers + hook overrides
- Tests for base hook lifecycle and Discord-specific reactions

Co-authored-by: alanwilhelm <alanwilhelm@users.noreply.github.com>
											
										
										
											2026-03-29 21:55:23 -07:00
+								            await self._run_processing_hook("on_processing_start", event)
-												Implement continuous typing indicator in message handling

- Added a new private method `_keep_typing` to send a typing indicator continuously while processing messages, refreshing every 4 seconds to comply with Telegram/Discord limitations.
- Updated the `handle_message` method to initiate the typing indicator at the start of message processing and ensure it stops once processing is complete, improving user experience during message handling.

											
										
										
											2026-02-03 14:51:31 -08:00
+								            # Call the handler (this can take a while with tool calls)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            response = await self._message_handler(event)
-												fix(gateway): downgrade empty/None response log from WARNING to DEBUG

This warning fires on every successful streamed response (streaming
delivers the text, handler returns None via already_sent=True) and
on every queued message during active processing. Both are expected
behavior, not error conditions. Downgrade to DEBUG to reduce log noise.

											
										
										
											2026-04-02 16:34:39 +05:30
+								            # Send response if any.  A None/empty response is normal when
 								            # streaming already delivered the text (already_sent=True) or
 								            # when the message was queued behind an active agent.  Log at
 								            # DEBUG to avoid noisy warnings for expected behavior.
-												fix(gateway): suppress duplicate replies on interrupt and streaming flood control

Three fixes for the duplicate reply bug affecting all gateway platforms:

1. base.py: Suppress stale response when the session was interrupted by a
   new message that hasn't been consumed yet. Checks both interrupt_event
   and _pending_messages to avoid false positives. (#8221, #2483)

2. run.py (return path): Remove response_previewed guard from already_sent
   check. Stream consumer's already_sent alone is authoritative — if
   content was delivered via streaming, the duplicate send must be
   suppressed regardless of the agent's response_previewed flag. (#8375)

3. run.py (queued-message path): Same fix — already_sent without
   response_previewed now correctly marks the first response as already
   streamed, preventing re-send before processing the queued message.

The response_previewed field is still produced by the agent (run_agent.py)
but is no longer required as a gate for duplicate suppression. The stream
consumer's already_sent flag is the delivery-level truth about what the
user actually saw.

Concepts from PR #8380 (konsisumer). Closes #8375, #8221, #2483.

											
										
										
											2026-04-15 03:31:08 -07:00
+								            #
 								            # Suppress stale response when the session was interrupted by a
 								            # new message that hasn't been consumed yet.  The pending message
 								            # is processed by the pending-message handler below (#8221/#2483).
 								            if (
 								                response
 								                and interrupt_event.is_set()
 								                and session_key in self._pending_messages
 								            ):
 								                logger.info(
 								                    "[%s] Suppressing stale response for interrupted session %s",
 								                    self.name,
 								                    session_key,
 								                )
 								                response = None
-												add full support for whatsapp

											
										
										
											2026-02-25 21:04:36 -08:00
+								            if not response:
-												fix(gateway): downgrade empty/None response log from WARNING to DEBUG

This warning fires on every successful streamed response (streaming
delivers the text, handler returns None via already_sent=True) and
on every queued message during active processing. Both are expected
behavior, not error conditions. Downgrade to DEBUG to reduce log noise.

											
										
										
											2026-04-02 16:34:39 +05:30
+								                logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            if response:
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								                # Extract MEDIA:<path> tags (from TTS tool) before other processing
 								                media_files, response = self.extract_media(response)
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								                # Extract image URLs and send them as native platform attachments
 								                images, text_content = self.extract_images(response)
-												fix(gateway): strip MEDIA: and [[audio_as_voice]] tags from message body

* fix(gateway): strip MEDIA: and [[audio_as_voice]] tags from message body

Closes #1561

* fix: remove redundant re import, use existing import

---------

Co-authored-by: mettin4 <coktinmetin@gmail.com>
											
										
										
											2026-03-17 01:47:35 -07:00
+								                # Strip any remaining internal directives from message body (fixes #1561)
 								                text_content = text_content.replace("[[audio_as_voice]]", "").strip()
 								                text_content = re.sub(r"MEDIA:\s*\S+", "", text_content).strip()
-												Add logger.info/error for image extraction and delivery debugging

											
										
										
											2026-03-07 21:24:47 -08:00
+								                if images:
 								                    logger.info("[%s] extract_images found %d image(s) in response (%d chars)", self.name, len(images), len(response))
-												feat: auto-detect local file paths in gateway responses for native media delivery (#1640)

Small models (7B-14B) can't reliably use MEDIA: or IMAGE: syntax. This
adds extract_local_files() to BasePlatformAdapter that regex-detects
bare local file paths ending in image/video extensions, validates them
with os.path.isfile(), and delivers them as native platform attachments.

Hardened over the original PR:
- Code-block exclusion: paths inside fenced blocks and inline code are
  skipped so code samples are never mutilated
- URL rejection: negative lookbehind prevents matching path segments
  inside HTTP URLs
- Relative path rejection: ./foo.png no longer matches
- Tilde path cleanup: raw ~/... form is removed from response text
- Deduplication by expanded path
- Added .webm to _VIDEO_EXTS
- Fallback to send_document for unrecognized media extensions

Based on PR #1636 by sudoingX.

Co-authored-by: sudoingX <sudoingX@users.noreply.github.com>
											
										
										
											2026-03-17 01:47:34 -07:00
 								                # Auto-detect bare local file paths for native media delivery
 								                # (helps small models that don't use MEDIA: syntax)
 								                local_files, text_content = self.extract_local_files(text_content)
 								                if local_files:
 								                    logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
-												Refactor message handling and error logging in agent and gateway

- Updated the AIAgent class to extract the first user message for trajectory formatting, improving the accuracy of user queries in the trajectory format.
- Enhanced the GatewayRunner to convert transcript history into the agent format, ensuring proper handling of message roles and content.
- Adjusted the typing indicator refresh rate to every 2 seconds for better responsiveness.
- Improved error handling in the message sending process for the Telegram adapter, implementing a fallback mechanism for Markdown parsing failures, and logging send failures for better debugging.

											
										
										
											2026-02-03 15:42:54 -08:00
-												feat: add voice conversation support and futuristic UI redesign

- Auto-TTS: voice messages get spoken response (audio first, then text)
- STT: Groq Whisper fallback when VOICE_TOOLS_OPENAI_KEY not set
- Futuristic UI: glassmorphism, centered container, purple theme, glow effects
- Voice bubble: custom waveform player with seek and progress
- Invisible TTS playback via play_tts() method (no audio file in chat)
- Add hermes-web toolset with full tool access
- Register Platform.WEB in toolset/config maps
- Update docs for voice conversation feature

											
										
										
											2026-03-11 20:16:57 +03:00
+								                # Auto-TTS: if voice message, generate audio FIRST (before sending text)
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								                # Skipped when the chat has voice mode disabled (/voice off)
-												feat: add voice conversation support and futuristic UI redesign

- Auto-TTS: voice messages get spoken response (audio first, then text)
- STT: Groq Whisper fallback when VOICE_TOOLS_OPENAI_KEY not set
- Futuristic UI: glassmorphism, centered container, purple theme, glow effects
- Voice bubble: custom waveform player with seek and progress
- Invisible TTS playback via play_tts() method (no audio file in chat)
- Add hermes-web toolset with full tool access
- Register Platform.WEB in toolset/config maps
- Update docs for voice conversation feature

											
										
										
											2026-03-11 20:16:57 +03:00
+								                _tts_path = None
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								                if (event.message_type == MessageType.VOICE
 								                        and text_content
 								                        and not media_files
 								                        and event.source.chat_id not in self._auto_tts_disabled_chats):
-												feat: add voice conversation support and futuristic UI redesign

- Auto-TTS: voice messages get spoken response (audio first, then text)
- STT: Groq Whisper fallback when VOICE_TOOLS_OPENAI_KEY not set
- Futuristic UI: glassmorphism, centered container, purple theme, glow effects
- Voice bubble: custom waveform player with seek and progress
- Invisible TTS playback via play_tts() method (no audio file in chat)
- Add hermes-web toolset with full tool access
- Register Platform.WEB in toolset/config maps
- Update docs for voice conversation feature

											
										
										
											2026-03-11 20:16:57 +03:00
+								                    try:
 								                        from tools.tts_tool import text_to_speech_tool, check_tts_requirements
 								                        if check_tts_requirements():
 								                            import json as _json
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
+								                            speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
 								                            if not speech_text:
 								                                raise ValueError("Empty text after markdown cleanup")
-												feat: add voice conversation support and futuristic UI redesign

- Auto-TTS: voice messages get spoken response (audio first, then text)
- STT: Groq Whisper fallback when VOICE_TOOLS_OPENAI_KEY not set
- Futuristic UI: glassmorphism, centered container, purple theme, glow effects
- Voice bubble: custom waveform player with seek and progress
- Invisible TTS playback via play_tts() method (no audio file in chat)
- Add hermes-web toolset with full tool access
- Register Platform.WEB in toolset/config maps
- Update docs for voice conversation feature

											
										
										
											2026-03-11 20:16:57 +03:00
+								                            tts_result_str = await asyncio.to_thread(
 								                                text_to_speech_tool, text=speech_text
 								                            )
 								                            tts_data = _json.loads(tts_result_str)
 								                            _tts_path = tts_data.get("file_path")
 								                    except Exception as tts_err:
 								                        logger.warning("[%s] Auto-TTS failed: %s", self.name, tts_err)
 								                # Play TTS audio before text (voice-first experience)
 								                if _tts_path and Path(_tts_path).exists():
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								                    try:
 								                        await self.play_tts(
 								                            chat_id=event.source.chat_id,
 								                            audio_path=_tts_path,
 								                            metadata=_thread_metadata,
 								                        )
 								                    finally:
 								                        try:
 								                            os.remove(_tts_path)
 								                        except OSError:
 								                            pass
-												feat: add voice conversation support and futuristic UI redesign

- Auto-TTS: voice messages get spoken response (audio first, then text)
- STT: Groq Whisper fallback when VOICE_TOOLS_OPENAI_KEY not set
- Futuristic UI: glassmorphism, centered container, purple theme, glow effects
- Voice bubble: custom waveform player with seek and progress
- Invisible TTS playback via play_tts() method (no audio file in chat)
- Add hermes-web toolset with full tool access
- Register Platform.WEB in toolset/config maps
- Update docs for voice conversation feature

											
										
										
											2026-03-11 20:16:57 +03:00
 								                # Send the text portion
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								                if text_content:
-												add full support for whatsapp

											
										
										
											2026-02-25 21:04:36 -08:00
+								                    logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id)
-												fix(gateway): retry transient send failures and notify user on exhaustion (#3288)

When send() fails due to a network error (ConnectError, ReadTimeout, etc.),
the failure was silently logged and the user received no feedback — appearing
as a hang. In one reported case, a user waited 1+ hour for a response that
had already been generated but failed to deliver (#2910).

Adds _send_with_retry() to BasePlatformAdapter:
- Transient errors: retry up to 2x with exponential backoff + jitter
- On exhaustion: send delivery-failure notice so user knows to retry
- Permanent errors: fall back to plain-text version (preserves existing behavior)
- SendResult.retryable flag for platform-specific transient errors

All adapters benefit automatically via BasePlatformAdapter inheritance.

Cherry-picked from PR #3108 by Mibayy.

Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
											
										
										
											2026-03-26 17:37:10 -07:00
+								                    result = await self._send_with_retry(
-												Refactor message handling and error logging in agent and gateway

- Updated the AIAgent class to extract the first user message for trajectory formatting, improving the accuracy of user queries in the trajectory format.
- Enhanced the GatewayRunner to convert transcript history into the agent format, ensuring proper handling of message roles and content.
- Adjusted the typing indicator refresh rate to every 2 seconds for better responsiveness.
- Improved error handling in the message sending process for the Telegram adapter, implementing a fallback mechanism for Markdown parsing failures, and logging send failures for better debugging.

											
										
										
											2026-02-03 15:42:54 -08:00
+								                        chat_id=event.source.chat_id,
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								                        content=text_content,
-												fix: forward thread_id metadata for Telegram forum topic routing

Replies in Telegram forum topics (supergroups with topics) now land in
the correct topic thread instead of 'General'.

- base.py: build thread_id metadata from event.source, pass to all
  send/media calls; add metadata param to send_typing, send_image,
  send_animation, send_voice, send_video, send_document, send_image_file,
  _keep_typing
- telegram.py: extract thread_id from metadata and pass as
  message_thread_id to all Bot API calls (send_photo, send_voice,
  send_audio, send_animation, send_chat_action)
- run.py: pass thread_id metadata to progress/streaming send calls
- discord/slack/whatsapp/homeassistant: update send_typing signature

Based on the fix proposed by @Bitstreamono in PR #656.

											
										
										
											2026-03-10 06:21:15 -07:00
+								                        reply_to=event.message_id,
 								                        metadata=_thread_metadata,
-												Refactor message handling and error logging in agent and gateway

- Updated the AIAgent class to extract the first user message for trajectory formatting, improving the accuracy of user queries in the trajectory format.
- Enhanced the GatewayRunner to convert transcript history into the agent format, ensuring proper handling of message roles and content.
- Adjusted the typing indicator refresh rate to every 2 seconds for better responsiveness.
- Improved error handling in the message sending process for the Telegram adapter, implementing a fallback mechanism for Markdown parsing failures, and logging send failures for better debugging.

											
										
										
											2026-02-03 15:42:54 -08:00
+								                    )
-												feat(discord): add message processing reactions (salvage #1980) (#3871)

Adds lifecycle hooks to the base platform adapter so Discord (and future
platforms) can react to message processing events:

  👀  when processing starts
  ✅  on successful completion (delivery confirmed)
  ❌  on failure, error, or cancellation

Implementation:
- base.py: on_processing_start/on_processing_complete hooks with
  _run_processing_hook error isolation wrapper; delivery tracking
  via _record_delivery closure for accurate success detection
- discord.py: _add_reaction/_remove_reaction helpers + hook overrides
- Tests for base hook lifecycle and Discord-specific reactions

Co-authored-by: alanwilhelm <alanwilhelm@users.noreply.github.com>
											
										
										
											2026-03-29 21:55:23 -07:00
+								                    _record_delivery(result)
-												feat: add voice conversation support and futuristic UI redesign

- Auto-TTS: voice messages get spoken response (audio first, then text)
- STT: Groq Whisper fallback when VOICE_TOOLS_OPENAI_KEY not set
- Futuristic UI: glassmorphism, centered container, purple theme, glow effects
- Voice bubble: custom waveform player with seek and progress
- Invisible TTS playback via play_tts() method (no audio file in chat)
- Add hermes-web toolset with full tool access
- Register Platform.WEB in toolset/config maps
- Update docs for voice conversation feature

											
										
										
											2026-03-11 20:16:57 +03:00
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								                # Human-like pacing delay between text and media
 								                human_delay = self._get_human_delay()
-												feat: add voice conversation support and futuristic UI redesign

- Auto-TTS: voice messages get spoken response (audio first, then text)
- STT: Groq Whisper fallback when VOICE_TOOLS_OPENAI_KEY not set
- Futuristic UI: glassmorphism, centered container, purple theme, glow effects
- Voice bubble: custom waveform player with seek and progress
- Invisible TTS playback via play_tts() method (no audio file in chat)
- Add hermes-web toolset with full tool access
- Register Platform.WEB in toolset/config maps
- Update docs for voice conversation feature

											
										
										
											2026-03-11 20:16:57 +03:00
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								                # Send extracted images as native attachments
-												Add logger.info/error for image extraction and delivery debugging

											
										
										
											2026-03-07 21:24:47 -08:00
+								                if images:
 								                    logger.info("[%s] Extracted %d image(s) to send as attachments", self.name, len(images))
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								                for image_url, alt_text in images:
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								                    if human_delay > 0:
 								                        await asyncio.sleep(human_delay)
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								                    try:
-												fix(gateway): sanitize media URLs in base platform logs

											
										
										
											2026-04-06 23:27:54 +03:00
+								                        logger.info(
 								                            "[%s] Sending image: %s (alt=%s)",
 								                            self.name,
-												fix: make safe_url_for_log public, add SSRF redirect guards to base.py cache helpers

Follow-up to Dusk1e's PR #7120 (Slack send_image redirect guard):
- Rename _safe_url_for_log -> safe_url_for_log (drop underscore) since
  it is now imported cross-module by the Slack adapter
- Add _ssrf_redirect_guard httpx event hook to cache_image_from_url()
  and cache_audio_from_url() in base.py — same pattern as vision_tools
  and the Slack adapter fix
- Update url_safety.py docstring to reflect broader coverage
- Add regression tests for image/audio redirect blocking + safe passthrough

											
										
										
											2026-04-10 05:02:17 -07:00
+								                            safe_url_for_log(image_url),
-												fix(gateway): sanitize media URLs in base platform logs

											
										
										
											2026-04-06 23:27:54 +03:00
+								                            alt_text[:30] if alt_text else "",
 								                        )
-												feat(animation): add support for sending animated GIFs in BasePlatformAdapter and TelegramAdapter

											
										
										
											2026-02-28 11:25:44 -08:00
+								                        # Route animated GIFs through send_animation for proper playback
 								                        if self._is_animation_url(image_url):
 								                            img_result = await self.send_animation(
 								                                chat_id=event.source.chat_id,
 								                                animation_url=image_url,
 								                                caption=alt_text if alt_text else None,
-												fix: forward thread_id metadata for Telegram forum topic routing

Replies in Telegram forum topics (supergroups with topics) now land in
the correct topic thread instead of 'General'.

- base.py: build thread_id metadata from event.source, pass to all
  send/media calls; add metadata param to send_typing, send_image,
  send_animation, send_voice, send_video, send_document, send_image_file,
  _keep_typing
- telegram.py: extract thread_id from metadata and pass as
  message_thread_id to all Bot API calls (send_photo, send_voice,
  send_audio, send_animation, send_chat_action)
- run.py: pass thread_id metadata to progress/streaming send calls
- discord/slack/whatsapp/homeassistant: update send_typing signature

Based on the fix proposed by @Bitstreamono in PR #656.

											
										
										
											2026-03-10 06:21:15 -07:00
+								                                metadata=_thread_metadata,
-												feat(animation): add support for sending animated GIFs in BasePlatformAdapter and TelegramAdapter

											
										
										
											2026-02-28 11:25:44 -08:00
+								                            )
 								                        else:
 								                            img_result = await self.send_image(
 								                                chat_id=event.source.chat_id,
 								                                image_url=image_url,
 								                                caption=alt_text if alt_text else None,
-												fix: forward thread_id metadata for Telegram forum topic routing

Replies in Telegram forum topics (supergroups with topics) now land in
the correct topic thread instead of 'General'.

- base.py: build thread_id metadata from event.source, pass to all
  send/media calls; add metadata param to send_typing, send_image,
  send_animation, send_voice, send_video, send_document, send_image_file,
  _keep_typing
- telegram.py: extract thread_id from metadata and pass as
  message_thread_id to all Bot API calls (send_photo, send_voice,
  send_audio, send_animation, send_chat_action)
- run.py: pass thread_id metadata to progress/streaming send calls
- discord/slack/whatsapp/homeassistant: update send_typing signature

Based on the fix proposed by @Bitstreamono in PR #656.

											
										
										
											2026-03-10 06:21:15 -07:00
+								                                metadata=_thread_metadata,
-												feat(animation): add support for sending animated GIFs in BasePlatformAdapter and TelegramAdapter

											
										
										
											2026-02-28 11:25:44 -08:00
+								                            )
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								                        if not img_result.success:
-												Add logger.info/error for image extraction and delivery debugging

											
										
										
											2026-03-07 21:24:47 -08:00
+								                            logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
-												Enhance image handling in platform adapters

- Updated the image generation function description to clarify usage with markdown.
- Added `send_image` method to `BasePlatformAdapter` for native image sending across platforms.
- Implemented `send_image` in `DiscordAdapter` and `TelegramAdapter` to handle image attachments directly.
- Introduced `extract_images` method to extract image URLs from markdown and HTML, improving content processing.
- Enhanced message handling to support sending images as attachments while maintaining text content.

											
										
										
											2026-02-10 21:02:40 -08:00
+								                    except Exception as img_err:
-												Add logger.info/error for image extraction and delivery debugging

											
										
										
											2026-03-07 21:24:47 -08:00
+								                        logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
-												feat: add voice conversation support and futuristic UI redesign

- Auto-TTS: voice messages get spoken response (audio first, then text)
- STT: Groq Whisper fallback when VOICE_TOOLS_OPENAI_KEY not set
- Futuristic UI: glassmorphism, centered container, purple theme, glow effects
- Voice bubble: custom waveform player with seek and progress
- Invisible TTS playback via play_tts() method (no audio file in chat)
- Add hermes-web toolset with full tool access
- Register Platform.WEB in toolset/config maps
- Update docs for voice conversation feature

											
										
										
											2026-03-11 20:16:57 +03:00
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								                # Send extracted media files — route by file type
 								                _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
-												feat: auto-detect local file paths in gateway responses for native media delivery (#1640)

Small models (7B-14B) can't reliably use MEDIA: or IMAGE: syntax. This
adds extract_local_files() to BasePlatformAdapter that regex-detects
bare local file paths ending in image/video extensions, validates them
with os.path.isfile(), and delivers them as native platform attachments.

Hardened over the original PR:
- Code-block exclusion: paths inside fenced blocks and inline code are
  skipped so code samples are never mutilated
- URL rejection: negative lookbehind prevents matching path segments
  inside HTTP URLs
- Relative path rejection: ./foo.png no longer matches
- Tilde path cleanup: raw ~/... form is removed from response text
- Deduplication by expanded path
- Added .webm to _VIDEO_EXTS
- Fallback to send_document for unrecognized media extensions

Based on PR #1636 by sudoingX.

Co-authored-by: sudoingX <sudoingX@users.noreply.github.com>
											
										
										
											2026-03-17 01:47:34 -07:00
+								                _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								                _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
 								                for media_path, is_voice in media_files:
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								                    if human_delay > 0:
 								                        await asyncio.sleep(human_delay)
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								                    try:
-												Merge PR #292: feat(whatsapp): native media attachments for images, videos and documents

Authored by satelerd. Adds native WhatsApp media sending for images, videos,
and documents via MEDIA: tags. Also includes conflict resolution with edit_message
feature, Telegram hint fix (only advertise supported media types), and import cleanup.

											
										
										
											2026-03-05 08:35:13 -08:00
+								                        ext = Path(media_path).suffix.lower()
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								                        if ext in _AUDIO_EXTS:
 								                            media_result = await self.send_voice(
 								                                chat_id=event.source.chat_id,
 								                                audio_path=media_path,
-												fix: forward thread_id metadata for Telegram forum topic routing

Replies in Telegram forum topics (supergroups with topics) now land in
the correct topic thread instead of 'General'.

- base.py: build thread_id metadata from event.source, pass to all
  send/media calls; add metadata param to send_typing, send_image,
  send_animation, send_voice, send_video, send_document, send_image_file,
  _keep_typing
- telegram.py: extract thread_id from metadata and pass as
  message_thread_id to all Bot API calls (send_photo, send_voice,
  send_audio, send_animation, send_chat_action)
- run.py: pass thread_id metadata to progress/streaming send calls
- discord/slack/whatsapp/homeassistant: update send_typing signature

Based on the fix proposed by @Bitstreamono in PR #656.

											
										
										
											2026-03-10 06:21:15 -07:00
+								                                metadata=_thread_metadata,
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								                            )
 								                        elif ext in _VIDEO_EXTS:
 								                            media_result = await self.send_video(
 								                                chat_id=event.source.chat_id,
 								                                video_path=media_path,
-												fix: forward thread_id metadata for Telegram forum topic routing

Replies in Telegram forum topics (supergroups with topics) now land in
the correct topic thread instead of 'General'.

- base.py: build thread_id metadata from event.source, pass to all
  send/media calls; add metadata param to send_typing, send_image,
  send_animation, send_voice, send_video, send_document, send_image_file,
  _keep_typing
- telegram.py: extract thread_id from metadata and pass as
  message_thread_id to all Bot API calls (send_photo, send_voice,
  send_audio, send_animation, send_chat_action)
- run.py: pass thread_id metadata to progress/streaming send calls
- discord/slack/whatsapp/homeassistant: update send_typing signature

Based on the fix proposed by @Bitstreamono in PR #656.

											
										
										
											2026-03-10 06:21:15 -07:00
+								                                metadata=_thread_metadata,
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								                            )
 								                        elif ext in _IMAGE_EXTS:
 								                            media_result = await self.send_image_file(
 								                                chat_id=event.source.chat_id,
 								                                image_path=media_path,
-												fix: forward thread_id metadata for Telegram forum topic routing

Replies in Telegram forum topics (supergroups with topics) now land in
the correct topic thread instead of 'General'.

- base.py: build thread_id metadata from event.source, pass to all
  send/media calls; add metadata param to send_typing, send_image,
  send_animation, send_voice, send_video, send_document, send_image_file,
  _keep_typing
- telegram.py: extract thread_id from metadata and pass as
  message_thread_id to all Bot API calls (send_photo, send_voice,
  send_audio, send_animation, send_chat_action)
- run.py: pass thread_id metadata to progress/streaming send calls
- discord/slack/whatsapp/homeassistant: update send_typing signature

Based on the fix proposed by @Bitstreamono in PR #656.

											
										
										
											2026-03-10 06:21:15 -07:00
+								                                metadata=_thread_metadata,
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								                            )
 								                        else:
 								                            media_result = await self.send_document(
 								                                chat_id=event.source.chat_id,
 								                                file_path=media_path,
-												fix: forward thread_id metadata for Telegram forum topic routing

Replies in Telegram forum topics (supergroups with topics) now land in
the correct topic thread instead of 'General'.

- base.py: build thread_id metadata from event.source, pass to all
  send/media calls; add metadata param to send_typing, send_image,
  send_animation, send_voice, send_video, send_document, send_image_file,
  _keep_typing
- telegram.py: extract thread_id from metadata and pass as
  message_thread_id to all Bot API calls (send_photo, send_voice,
  send_audio, send_animation, send_chat_action)
- run.py: pass thread_id metadata to progress/streaming send calls
- discord/slack/whatsapp/homeassistant: update send_typing signature

Based on the fix proposed by @Bitstreamono in PR #656.

											
										
										
											2026-03-10 06:21:15 -07:00
+								                                metadata=_thread_metadata,
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								                            )
 								                        if not media_result.success:
-												fix(gateway): replace print() with logger calls in BasePlatformAdapter (#3669)

Salvage of PR #3616 (memosr). Replaces 6 print() calls with proper logger calls in BasePlatformAdapter + removes redundant traceback.print_exc().

Co-Authored-By: memosr <memosr@users.noreply.github.com>
											
										
										
											2026-03-28 22:25:35 -07:00
+								                            logger.warning("[%s] Failed to send media (%s): %s", self.name, ext, media_result.error)
-												feat(whatsapp): native media sending — images, videos, documents

Add a /send-media endpoint to the WhatsApp bridge and corresponding
adapter methods so the agent can send files as native WhatsApp
attachments instead of plain-text URLs/paths.

- bridge.js: new POST /send-media endpoint using Baileys' native
  image/video/document/audio message types with MIME detection
- base.py: add send_video(), send_document(), send_image_file()
  with text fallbacks; route MEDIA: tags by file extension instead
  of always treating them as voice messages
- whatsapp.py: implement all media methods via a shared
  _send_media_to_bridge() helper; override send_image() to download
  URLs to local cache and send as native photos
- prompt_builder.py: update WhatsApp and Telegram platform hints so
  the agent knows it can use MEDIA:/path tags to send native media

											
										
										
											2026-03-02 16:34:49 -03:00
+								                    except Exception as media_err:
-												fix(gateway): replace print() with logger calls in BasePlatformAdapter (#3669)

Salvage of PR #3616 (memosr). Replaces 6 print() calls with proper logger calls in BasePlatformAdapter + removes redundant traceback.print_exc().

Co-Authored-By: memosr <memosr@users.noreply.github.com>
											
										
										
											2026-03-28 22:25:35 -07:00
+								                        logger.warning("[%s] Error sending media: %s", self.name, media_err)
-												feat: auto-detect local file paths in gateway responses for native media delivery (#1640)

Small models (7B-14B) can't reliably use MEDIA: or IMAGE: syntax. This
adds extract_local_files() to BasePlatformAdapter that regex-detects
bare local file paths ending in image/video extensions, validates them
with os.path.isfile(), and delivers them as native platform attachments.

Hardened over the original PR:
- Code-block exclusion: paths inside fenced blocks and inline code are
  skipped so code samples are never mutilated
- URL rejection: negative lookbehind prevents matching path segments
  inside HTTP URLs
- Relative path rejection: ./foo.png no longer matches
- Tilde path cleanup: raw ~/... form is removed from response text
- Deduplication by expanded path
- Added .webm to _VIDEO_EXTS
- Fallback to send_document for unrecognized media extensions

Based on PR #1636 by sudoingX.

Co-authored-by: sudoingX <sudoingX@users.noreply.github.com>
											
										
										
											2026-03-17 01:47:34 -07:00
 								                # Send auto-detected local files as native attachments
 								                for file_path in local_files:
 								                    if human_delay > 0:
 								                        await asyncio.sleep(human_delay)
 								                    try:
 								                        ext = Path(file_path).suffix.lower()
 								                        if ext in _IMAGE_EXTS:
 								                            await self.send_image_file(
 								                                chat_id=event.source.chat_id,
 								                                image_path=file_path,
 								                                metadata=_thread_metadata,
 								                            )
 								                        elif ext in _VIDEO_EXTS:
 								                            await self.send_video(
 								                                chat_id=event.source.chat_id,
 								                                video_path=file_path,
 								                                metadata=_thread_metadata,
 								                            )
 								                        else:
 								                            await self.send_document(
 								                                chat_id=event.source.chat_id,
 								                                file_path=file_path,
 								                                metadata=_thread_metadata,
 								                            )
 								                    except Exception as file_err:
 								                        logger.error("[%s] Error sending local file %s: %s", self.name, file_path, file_err)
-												feat(discord): add message processing reactions (salvage #1980) (#3871)

Adds lifecycle hooks to the base platform adapter so Discord (and future
platforms) can react to message processing events:

  👀  when processing starts
  ✅  on successful completion (delivery confirmed)
  ❌  on failure, error, or cancellation

Implementation:
- base.py: on_processing_start/on_processing_complete hooks with
  _run_processing_hook error isolation wrapper; delivery tracking
  via _record_delivery closure for accurate success detection
- discord.py: _add_reaction/_remove_reaction helpers + hook overrides
- Tests for base hook lifecycle and Discord-specific reactions

Co-authored-by: alanwilhelm <alanwilhelm@users.noreply.github.com>
											
										
										
											2026-03-29 21:55:23 -07:00
+								            # Determine overall success for the processing hook
 								            processing_ok = delivery_succeeded if delivery_attempted else not bool(response)
-												fix(gateway): avoid false failure reactions on restart cancellation

											
										
										
											2026-04-08 16:07:07 -07:00
+								            await self._run_processing_hook(
 								                "on_processing_complete",
 								                event,
 								                ProcessingOutcome.SUCCESS if processing_ok else ProcessingOutcome.FAILURE,
 								            )
-												feat(discord): add message processing reactions (salvage #1980) (#3871)

Adds lifecycle hooks to the base platform adapter so Discord (and future
platforms) can react to message processing events:

  👀  when processing starts
  ✅  on successful completion (delivery confirmed)
  ❌  on failure, error, or cancellation

Implementation:
- base.py: on_processing_start/on_processing_complete hooks with
  _run_processing_hook error isolation wrapper; delivery tracking
  via _record_delivery closure for accurate success detection
- discord.py: _add_reaction/_remove_reaction helpers + hook overrides
- Tests for base hook lifecycle and Discord-specific reactions

Co-authored-by: alanwilhelm <alanwilhelm@users.noreply.github.com>
											
										
										
											2026-03-29 21:55:23 -07:00
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
+								            # Check if there's a pending message that was queued during our processing
 								            if session_key in self._pending_messages:
 								                pending_event = self._pending_messages.pop(session_key)
-												fix(gateway): replace print() with logger calls in BasePlatformAdapter (#3669)

Salvage of PR #3616 (memosr). Replaces 6 print() calls with proper logger calls in BasePlatformAdapter + removes redundant traceback.print_exc().

Co-Authored-By: memosr <memosr@users.noreply.github.com>
											
										
										
											2026-03-28 22:25:35 -07:00
+								                logger.debug("[%s] Processing queued message from interrupt", self.name)
-												fix(gateway): close pending-drain and late-arrival races in base adapter (#12371)

Two related race conditions in gateway/platforms/base.py that could
produce duplicate agent runs or silently drop messages. Neither is
specific to any one platform — all adapters inherit this logic.

R5 (HIGH) — duplicate agent spawn on turn chain
  In _process_message_background, the pending-drain path deleted
  _active_sessions[session_key] before awaiting typing_task.cancel()
  and then recursively awaiting _process_message_background for the
  queued event. During the typing_task await, a fresh inbound message
  M3 could pass the Level-1 guard (entry now missing), set its own
  Event, and spawn a second _process_message_background for the same
  session_key — two agents running simultaneously, duplicate responses,
  duplicate tool calls.

  Fix: keep the _active_sessions entry populated and only clear() the
  Event. The guard stays live, so any concurrent inbound message takes
  the busy-handler path (queue + interrupt) as intended.

R6 (MED-HIGH) — message dropped during finally cleanup
  The finally block has two await points (typing_task, stop_typing)
  before it deletes _active_sessions. A message arriving in that
  window passes the guard (entry still live), lands in
  _pending_messages via the busy-handler — and then the unconditional
  del removes the guard with that message still queued. Nothing
  drains it; the user never gets a reply.

  Fix: before deleting _active_sessions in finally, pop any late
  pending_messages entry and spawn a drain task for it. Only delete
  _active_sessions when no pending is waiting.

Tests: tests/gateway/test_pending_drain_race.py — three regression
cases. Validated: without the fix, two of the three fail exactly
where the races manifest (duplicate-spawn guard loses identity,
late-arrival 'LATE' message not in processed list).
											
										
										
											2026-04-18 19:32:26 -07:00
+								                # Keep the _active_sessions entry live across the turn chain
 								                # and only CLEAR the interrupt Event — do NOT delete the entry.
 								                # If we deleted here, a concurrent inbound message arriving
 								                # during the awaits below would pass the Level-1 guard, spawn
 								                # its own _process_message_background, and run simultaneously
 								                # with the recursive drain below.  Two agents on one
 								                # session_key = duplicate responses, duplicate tool calls.
 								                # Clearing the Event keeps the guard live so follow-ups take
 								                # the busy-handler path (queue + interrupt) as intended.
 								                _active = self._active_sessions.get(session_key)
 								                if _active is not None:
 								                    _active.clear()
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
+								                typing_task.cancel()
 								                try:
 								                    await typing_task
 								                except asyncio.CancelledError:
 								                    pass
 								                # Process pending message in new background task
 								                await self._process_message_background(pending_event, session_key)
 								                return  # Already cleaned up
-												feat(discord): add message processing reactions (salvage #1980) (#3871)

Adds lifecycle hooks to the base platform adapter so Discord (and future
platforms) can react to message processing events:

  👀  when processing starts
  ✅  on successful completion (delivery confirmed)
  ❌  on failure, error, or cancellation

Implementation:
- base.py: on_processing_start/on_processing_complete hooks with
  _run_processing_hook error isolation wrapper; delivery tracking
  via _record_delivery closure for accurate success detection
- discord.py: _add_reaction/_remove_reaction helpers + hook overrides
- Tests for base hook lifecycle and Discord-specific reactions

Co-authored-by: alanwilhelm <alanwilhelm@users.noreply.github.com>
											
										
										
											2026-03-29 21:55:23 -07:00
+								        except asyncio.CancelledError:
-												fix(gateway): avoid false failure reactions on restart cancellation

											
										
										
											2026-04-08 16:07:07 -07:00
+								            current_task = asyncio.current_task()
 								            outcome = ProcessingOutcome.CANCELLED
 								            if current_task is None or current_task not in self._expected_cancelled_tasks:
 								                outcome = ProcessingOutcome.FAILURE
 								            await self._run_processing_hook("on_processing_complete", event, outcome)
-												feat(discord): add message processing reactions (salvage #1980) (#3871)

Adds lifecycle hooks to the base platform adapter so Discord (and future
platforms) can react to message processing events:

  👀  when processing starts
  ✅  on successful completion (delivery confirmed)
  ❌  on failure, error, or cancellation

Implementation:
- base.py: on_processing_start/on_processing_complete hooks with
  _run_processing_hook error isolation wrapper; delivery tracking
  via _record_delivery closure for accurate success detection
- discord.py: _add_reaction/_remove_reaction helpers + hook overrides
- Tests for base hook lifecycle and Discord-specific reactions

Co-authored-by: alanwilhelm <alanwilhelm@users.noreply.github.com>
											
										
										
											2026-03-29 21:55:23 -07:00
+								            raise
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        except Exception as e:
-												fix(gateway): avoid false failure reactions on restart cancellation

											
										
										
											2026-04-08 16:07:07 -07:00
+								            await self._run_processing_hook("on_processing_complete", event, ProcessingOutcome.FAILURE)
-												fix(gateway): replace print() with logger calls in BasePlatformAdapter (#3669)

Salvage of PR #3616 (memosr). Replaces 6 print() calls with proper logger calls in BasePlatformAdapter + removes redundant traceback.print_exc().

Co-Authored-By: memosr <memosr@users.noreply.github.com>
											
										
										
											2026-03-28 22:25:35 -07:00
+								            logger.error("[%s] Error handling message: %s", self.name, e, exc_info=True)
-												fix: send error details to user in gateway outer exception handler

Previously, if an error occurred during response processing in
_process_message_background (e.g. during extract_media, send, or
any uncaught exception from the handler), the error was only logged
to server console and the user was left with radio silence — typing
indicator stops but no message arrives.

Now the outer except block attempts to send the error type and detail
(truncated to 300 chars) to the user's chat, matching the format
already used by the inner handler in gateway/run.py.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 10:42:43 -07:00
+								            # Send the error to the user so they aren't left with radio silence
 								            try:
 								                error_type = type(e).__name__
 								                error_detail = str(e)[:300] if str(e) else "no details available"
 								                _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
 								                await self.send(
 								                    chat_id=event.source.chat_id,
 								                    content=(
 								                        f"Sorry, I encountered an error ({error_type}).\n"
 								                        f"{error_detail}\n"
 								                        "Try again or use /reset to start a fresh session."
 								                    ),
 								                    metadata=_thread_metadata,
 								                )
 								            except Exception:
 								                pass  # Last resort — don't let error reporting crash the handler
-												Implement continuous typing indicator in message handling

- Added a new private method `_keep_typing` to send a typing indicator continuously while processing messages, refreshing every 4 seconds to comply with Telegram/Discord limitations.
- Updated the `handle_message` method to initiate the typing indicator at the start of message processing and ensure it stops once processing is complete, improving user experience during message handling.

											
										
										
											2026-02-03 14:51:31 -08:00
+								        finally:
-												fix(gateway): defer background review notifications until after main reply

Background review notifications ("💾 Skill created", "💾 Memory updated")
could race ahead of the main assistant reply in chat, making it look like
the agent stopped after creating a skill.

Gate bg-review notifications behind a threading.Event + pending queue.
Register a release callback on the adapter's _post_delivery_callbacks dict
so base.py's finally block fires it after the main response is delivered.

The queued-message path in _run_agent pops and calls the callback directly
to prevent double-fire.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
Closes #10541

											
										
										
											2026-04-15 16:40:38 -07:00
+								            # Fire any one-shot post-delivery callback registered for this
 								            # session (e.g. deferred background-review notifications).
-												fix: tighten gateway interrupt salvage follow-ups

Follow-up on top of the helix4u #12388 cherry-picks:
- make deferred post-delivery callbacks generation-aware end-to-end so
  stale runs cannot clear callbacks registered by a fresher run for the
  same session
- bind callback ownership to the active session event at run start and
  snapshot that generation inside base adapter processing so later event
  mutation cannot retarget cleanup
- pass run_generation through proxy mode and drop stale proxy streams /
  final results the same way local runs are dropped
- centralize stop/new interrupt cleanup into one helper and replace the
  open-coded branches with shared logic
- unify internal control interrupt reason strings via shared constants
- remove the return from base.py's finally block so cleanup no longer
  swallows cancellation/exception flow
- add focused regressions for generation forwarding, proxy stale
  suppression, and newer-callback preservation

This addresses all review findings from the initial #12388 review while
keeping the fix scoped to stale-output/typing-loop interrupt handling.

											
										
										
											2026-04-19 15:05:14 +05:30
+								            _callback_generation = callback_generation
 								            if hasattr(self, "pop_post_delivery_callback"):
 								                _post_cb = self.pop_post_delivery_callback(
 								                    session_key,
 								                    generation=_callback_generation,
 								                )
 								            else:
 								                _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
-												fix(gateway): defer background review notifications until after main reply

Background review notifications ("💾 Skill created", "💾 Memory updated")
could race ahead of the main assistant reply in chat, making it look like
the agent stopped after creating a skill.

Gate bg-review notifications behind a threading.Event + pending queue.
Register a release callback on the adapter's _post_delivery_callbacks dict
so base.py's finally block fires it after the main response is delivered.

The queued-message path in _run_agent pops and calls the callback directly
to prevent double-fire.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
Closes #10541

											
										
										
											2026-04-15 16:40:38 -07:00
+								            if callable(_post_cb):
 								                try:
 								                    _post_cb()
 								                except Exception:
 								                    pass
-												Implement continuous typing indicator in message handling

- Added a new private method `_keep_typing` to send a typing indicator continuously while processing messages, refreshing every 4 seconds to comply with Telegram/Discord limitations.
- Updated the `handle_message` method to initiate the typing indicator at the start of message processing and ensure it stops once processing is complete, improving user experience during message handling.

											
										
										
											2026-02-03 14:51:31 -08:00
+								            # Stop typing indicator
 								            typing_task.cancel()
 								            try:
 								                await typing_task
 								            except asyncio.CancelledError:
 								                pass
-												fix(discord): stop phantom typing indicator after agent turn completes (#3003)

Two fixes for a race where Discord's typing indicator lingers after the
agent finishes:

1. _keep_typing (root cause): after outer stop_typing() clears the task
   dict, _keep_typing wakes from its 2s sleep and calls send_typing()
   again, recreating an orphaned loop. Add a finally block so _keep_typing
   always calls stop_typing() on exit, cleaning up any loop it recreated.

2. _process_message_background (safety net): add stop_typing() after
   cancelling the typing task, catching any platform-level persistent
   typing tasks that slipped through.

Combines fixes from PR #2945 by catbusconductor (root cause in
_keep_typing) and PR #2832 by subrih (safety net in
_process_message_background).
											
										
										
											2026-03-25 11:28:28 -07:00
+								            # Also cancel any platform-level persistent typing tasks (e.g. Discord)
 								            # that may have been recreated by _keep_typing after the last stop_typing()
 								            try:
 								                if hasattr(self, "stop_typing"):
 								                    await self.stop_typing(event.source.chat_id)
 								            except Exception:
 								                pass
-												fix(gateway): close pending-drain and late-arrival races in base adapter (#12371)

Two related race conditions in gateway/platforms/base.py that could
produce duplicate agent runs or silently drop messages. Neither is
specific to any one platform — all adapters inherit this logic.

R5 (HIGH) — duplicate agent spawn on turn chain
  In _process_message_background, the pending-drain path deleted
  _active_sessions[session_key] before awaiting typing_task.cancel()
  and then recursively awaiting _process_message_background for the
  queued event. During the typing_task await, a fresh inbound message
  M3 could pass the Level-1 guard (entry now missing), set its own
  Event, and spawn a second _process_message_background for the same
  session_key — two agents running simultaneously, duplicate responses,
  duplicate tool calls.

  Fix: keep the _active_sessions entry populated and only clear() the
  Event. The guard stays live, so any concurrent inbound message takes
  the busy-handler path (queue + interrupt) as intended.

R6 (MED-HIGH) — message dropped during finally cleanup
  The finally block has two await points (typing_task, stop_typing)
  before it deletes _active_sessions. A message arriving in that
  window passes the guard (entry still live), lands in
  _pending_messages via the busy-handler — and then the unconditional
  del removes the guard with that message still queued. Nothing
  drains it; the user never gets a reply.

  Fix: before deleting _active_sessions in finally, pop any late
  pending_messages entry and spawn a drain task for it. Only delete
  _active_sessions when no pending is waiting.

Tests: tests/gateway/test_pending_drain_race.py — three regression
cases. Validated: without the fix, two of the three fail exactly
where the races manifest (duplicate-spawn guard loses identity,
late-arrival 'LATE' message not in processed list).
											
										
										
											2026-04-18 19:32:26 -07:00
+								            # Late-arrival drain: a message may have arrived during the
 								            # cleanup awaits above (typing_task cancel, stop_typing).  Such
 								            # messages passed the Level-1 guard (entry still live, Event
 								            # possibly set) and landed in _pending_messages via the
 								            # busy-handler path.  Without this block, we would delete the
 								            # active-session entry and the queued message would be silently
 								            # dropped (user never gets a reply).
 								            late_pending = self._pending_messages.pop(session_key, None)
 								            if late_pending is not None:
 								                logger.debug(
 								                    "[%s] Late-arrival pending message during cleanup — spawning drain task",
 								                    self.name,
 								                )
 								                _active = self._active_sessions.get(session_key)
 								                if _active is not None:
 								                    _active.clear()
 								                drain_task = asyncio.create_task(
 								                    self._process_message_background(late_pending, session_key)
 								                )
-												fix(gateway): serialize reset command handoff and heal stale session locks

Closes the adapter-side half of the split-brain described in issue #11016
where _active_sessions stays live but nothing is processing, trapping the
chat in repeated 'Interrupting current task...' while /stop reports no
active task.

Changes on BasePlatformAdapter:
- Add _session_tasks: Dict[str, asyncio.Task] mapping session -> owner task
  so session-terminating commands can cancel the right task and old task
  finally blocks can't clobber a newer task's guard.
- Add _release_session_guard(guard=...) that only releases if the guard
  Event still matches, preventing races where /stop or /new swaps in a
  temporary guard while the old task unwinds.
- Add _session_task_is_stale() and _heal_stale_session_lock() for
  on-entry self-heal: when handle_message() sees an _active_sessions
  entry whose RECORDED owner task is done/cancelled, clear it and fall
  through to normal dispatch.  No owner task recorded = not stale (some
  tests install guards directly and shouldn't be auto-healed).
- Add cancel_session_processing() as the explicit adapter-side cancel
  API so /stop/ /new/ /reset can cleanly tear down in-flight work.
- Route /stop, /new, /reset through _dispatch_active_session_command():
    1. install a temporary command guard so follow-ups stay queued
    2. let the runner process the command
    3. cancel the old adapter task AFTER the runner response is ready
    4. release the command guard and drain the latest pending follow-up
- _start_session_processing() replaces the inline create_task + guard
  setup in handle_message() so guard + owner-task entry land atomically.
- cancel_background_tasks() also clears _session_tasks.

Combined, this means:
- /stop / /new / /reset actually cancel stuck work instead of leaving
  adapter state desynced from runner state.
- A dead session lock self-heals on the next inbound message rather than
  persisting until gateway restart.
- Follow-up messages after /new are processed in order, after the reset
  command's runner response lands.

Refs #11016

											
										
										
											2026-04-23 03:13:08 -07:00
+								                # Hand ownership of the session to the drain task so stale-lock
 								                # detection keeps working while it runs.
 								                self._session_tasks[session_key] = drain_task
-												fix(gateway): close pending-drain and late-arrival races in base adapter (#12371)

Two related race conditions in gateway/platforms/base.py that could
produce duplicate agent runs or silently drop messages. Neither is
specific to any one platform — all adapters inherit this logic.

R5 (HIGH) — duplicate agent spawn on turn chain
  In _process_message_background, the pending-drain path deleted
  _active_sessions[session_key] before awaiting typing_task.cancel()
  and then recursively awaiting _process_message_background for the
  queued event. During the typing_task await, a fresh inbound message
  M3 could pass the Level-1 guard (entry now missing), set its own
  Event, and spawn a second _process_message_background for the same
  session_key — two agents running simultaneously, duplicate responses,
  duplicate tool calls.

  Fix: keep the _active_sessions entry populated and only clear() the
  Event. The guard stays live, so any concurrent inbound message takes
  the busy-handler path (queue + interrupt) as intended.

R6 (MED-HIGH) — message dropped during finally cleanup
  The finally block has two await points (typing_task, stop_typing)
  before it deletes _active_sessions. A message arriving in that
  window passes the guard (entry still live), lands in
  _pending_messages via the busy-handler — and then the unconditional
  del removes the guard with that message still queued. Nothing
  drains it; the user never gets a reply.

  Fix: before deleting _active_sessions in finally, pop any late
  pending_messages entry and spawn a drain task for it. Only delete
  _active_sessions when no pending is waiting.

Tests: tests/gateway/test_pending_drain_race.py — three regression
cases. Validated: without the fix, two of the three fail exactly
where the races manifest (duplicate-spawn guard loses identity,
late-arrival 'LATE' message not in processed list).
											
										
										
											2026-04-18 19:32:26 -07:00
+								                try:
 								                    self._background_tasks.add(drain_task)
 								                    drain_task.add_done_callback(self._background_tasks.discard)
 								                except TypeError:
 								                    # Tests stub create_task() with non-hashable sentinels; tolerate.
 								                    pass
 								                # Leave _active_sessions[session_key] populated — the drain
 								                # task's own lifecycle will clean it up.
-												fix: tighten gateway interrupt salvage follow-ups

Follow-up on top of the helix4u #12388 cherry-picks:
- make deferred post-delivery callbacks generation-aware end-to-end so
  stale runs cannot clear callbacks registered by a fresher run for the
  same session
- bind callback ownership to the active session event at run start and
  snapshot that generation inside base adapter processing so later event
  mutation cannot retarget cleanup
- pass run_generation through proxy mode and drop stale proxy streams /
  final results the same way local runs are dropped
- centralize stop/new interrupt cleanup into one helper and replace the
  open-coded branches with shared logic
- unify internal control interrupt reason strings via shared constants
- remove the return from base.py's finally block so cleanup no longer
  swallows cancellation/exception flow
- add focused regressions for generation forwarding, proxy stale
  suppression, and newer-callback preservation

This addresses all review findings from the initial #12388 review while
keeping the fix scoped to stale-output/typing-loop interrupt handling.

											
										
										
											2026-04-19 15:05:14 +05:30
+								            else:
-												fix(gateway): guard-match the finally-block _active_sessions delete

Before this, _process_message_background's finally did an unconditional
'del self._active_sessions[session_key]' — even if a /stop/ /new
command had already swapped in its own command_guard via
_dispatch_active_session_command and cancelled us.  The old task's
unwind would clobber the newer guard, opening a race for follow-ups.

Replace with _release_session_guard(session_key, guard=interrupt_event)
so the delete only fires when the guard we captured is still the one
installed.  The sibling _session_tasks pop already had equivalent
ownership matching via asyncio.current_task() identity; this closes the
asymmetry.

Adds two direct regressions in test_session_split_brain_11016:
- stale guard reference must not clobber a newer guard by identity
- guard=None default still releases unconditionally (for callers that
  don't have a captured guard to match against)

Refs #11016

											
										
										
											2026-04-23 05:12:41 -07:00
+								                # Clean up session tracking.  Guard-match both deletes so a
 								                # reset-like command that already swapped in its own
 								                # command_guard (and cancelled us) can't be accidentally
 								                # cleared by our unwind.  The command owns the session now.
-												fix(gateway): serialize reset command handoff and heal stale session locks

Closes the adapter-side half of the split-brain described in issue #11016
where _active_sessions stays live but nothing is processing, trapping the
chat in repeated 'Interrupting current task...' while /stop reports no
active task.

Changes on BasePlatformAdapter:
- Add _session_tasks: Dict[str, asyncio.Task] mapping session -> owner task
  so session-terminating commands can cancel the right task and old task
  finally blocks can't clobber a newer task's guard.
- Add _release_session_guard(guard=...) that only releases if the guard
  Event still matches, preventing races where /stop or /new swaps in a
  temporary guard while the old task unwinds.
- Add _session_task_is_stale() and _heal_stale_session_lock() for
  on-entry self-heal: when handle_message() sees an _active_sessions
  entry whose RECORDED owner task is done/cancelled, clear it and fall
  through to normal dispatch.  No owner task recorded = not stale (some
  tests install guards directly and shouldn't be auto-healed).
- Add cancel_session_processing() as the explicit adapter-side cancel
  API so /stop/ /new/ /reset can cleanly tear down in-flight work.
- Route /stop, /new, /reset through _dispatch_active_session_command():
    1. install a temporary command guard so follow-ups stay queued
    2. let the runner process the command
    3. cancel the old adapter task AFTER the runner response is ready
    4. release the command guard and drain the latest pending follow-up
- _start_session_processing() replaces the inline create_task + guard
  setup in handle_message() so guard + owner-task entry land atomically.
- cancel_background_tasks() also clears _session_tasks.

Combined, this means:
- /stop / /new / /reset actually cancel stuck work instead of leaving
  adapter state desynced from runner state.
- A dead session lock self-heals on the next inbound message rather than
  persisting until gateway restart.
- Follow-up messages after /new are processed in order, after the reset
  command's runner response lands.

Refs #11016

											
										
										
											2026-04-23 03:13:08 -07:00
+								                current_task = asyncio.current_task()
 								                if current_task is not None and self._session_tasks.get(session_key) is current_task:
 								                    del self._session_tasks[session_key]
-												fix(gateway): guard-match the finally-block _active_sessions delete

Before this, _process_message_background's finally did an unconditional
'del self._active_sessions[session_key]' — even if a /stop/ /new
command had already swapped in its own command_guard via
_dispatch_active_session_command and cancelled us.  The old task's
unwind would clobber the newer guard, opening a race for follow-ups.

Replace with _release_session_guard(session_key, guard=interrupt_event)
so the delete only fires when the guard we captured is still the one
installed.  The sibling _session_tasks pop already had equivalent
ownership matching via asyncio.current_task() identity; this closes the
asymmetry.

Adds two direct regressions in test_session_split_brain_11016:
- stale guard reference must not clobber a newer guard by identity
- guard=None default still releases unconditionally (for callers that
  don't have a captured guard to match against)

Refs #11016

											
										
										
											2026-04-23 05:12:41 -07:00
+								                self._release_session_guard(session_key, guard=interrupt_event)
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
-												fix(gateway): cancel active runs during shutdown

Track adapter background message-processing tasks, cancel them during gateway shutdown, and interrupt running agents before disconnecting adapters. This prevents old gateway instances from continuing in-flight work after stop/replace, which was contributing to the restart-time task continuation/flicker behavior reported in #1414. Adds regression coverage for adapter task cancellation and shutdown interrupts.

											
										
										
											2026-03-15 04:21:50 -07:00
+								    async def cancel_background_tasks(self) -> None:
 								        """Cancel any in-flight background message-processing tasks.
 								        Used during gateway shutdown/replacement so active sessions from the old
 								        process do not keep running after adapters are being torn down.
 								        """
-												fix(gateway): cancel_background_tasks must drain late-arrivals (#12471)

During gateway shutdown, a message arriving while
cancel_background_tasks is mid-await (inside asyncio.gather) spawns
a fresh _process_message_background task via handle_message and adds
it to self._background_tasks.  The original implementation's
_background_tasks.clear() at the end of cancel_background_tasks
dropped the reference; the task ran untracked against a disconnecting
adapter, logged send-failures, and lingered until it completed on
its own.

Fix: wrap the cancel+gather in a bounded loop (MAX_DRAIN_ROUNDS=5).
If new tasks appeared during the gather, cancel them in the next
round.  The .clear() at the end is preserved as a safety net for
any task that appeared after MAX_DRAIN_ROUNDS — but in practice the
drain stabilizes in 1-2 rounds.

Tests: tests/gateway/test_cancel_background_drain.py — 3 cases.
- test_cancel_background_tasks_drains_late_arrivals: spawn M1, start
  cancel, inject M2 during M1's shielded cleanup, verify M2 is
  cancelled.
- test_cancel_background_tasks_handles_no_tasks: no-op path still
  terminates cleanly.
- test_cancel_background_tasks_bounded_rounds: baseline — single
  task cancels in one round, loop terminates.

Regression-guard validated: against the unpatched implementation,
the late-arrival test fails with exactly the expected message
('task leaked').  With the fix it passes.

Blast radius is shutdown-only; the audit classified this as MED.
Shipping because the fix is small and the hygiene is worth it.

While investigating the audit's other MEDs (busy-handler double-ack,
Discord ExecApprovalView double-resolve, UpdatePromptView
double-resolve), I verified all three were false positives — the
check-and-set patterns have no await between them, so they're
atomic on single-threaded asyncio.  No fix needed for those.
											
										
										
											2026-04-19 01:48:42 -07:00
+								        # Loop until no new tasks appear.  Without this, a message
 								        # arriving during the `await asyncio.gather` below would spawn
 								        # a fresh _process_message_background task (added to
 								        # self._background_tasks at line ~1668 via handle_message),
 								        # and the _background_tasks.clear() at the end of this method
 								        # would drop the reference — the task runs untracked against a
 								        # disconnecting adapter, logs send-failures, and may linger
 								        # until it completes on its own.  Retrying the drain until the
 								        # task set stabilizes closes the window.
 								        MAX_DRAIN_ROUNDS = 5
 								        for _ in range(MAX_DRAIN_ROUNDS):
 								            tasks = [task for task in self._background_tasks if not task.done()]
 								            if not tasks:
 								                break
 								            for task in tasks:
 								                self._expected_cancelled_tasks.add(task)
 								                task.cancel()
-												fix(gateway): cancel active runs during shutdown

Track adapter background message-processing tasks, cancel them during gateway shutdown, and interrupt running agents before disconnecting adapters. This prevents old gateway instances from continuing in-flight work after stop/replace, which was contributing to the restart-time task continuation/flicker behavior reported in #1414. Adds regression coverage for adapter task cancellation and shutdown interrupts.

											
										
										
											2026-03-15 04:21:50 -07:00
+								            await asyncio.gather(*tasks, return_exceptions=True)
-												fix(gateway): cancel_background_tasks must drain late-arrivals (#12471)

During gateway shutdown, a message arriving while
cancel_background_tasks is mid-await (inside asyncio.gather) spawns
a fresh _process_message_background task via handle_message and adds
it to self._background_tasks.  The original implementation's
_background_tasks.clear() at the end of cancel_background_tasks
dropped the reference; the task ran untracked against a disconnecting
adapter, logged send-failures, and lingered until it completed on
its own.

Fix: wrap the cancel+gather in a bounded loop (MAX_DRAIN_ROUNDS=5).
If new tasks appeared during the gather, cancel them in the next
round.  The .clear() at the end is preserved as a safety net for
any task that appeared after MAX_DRAIN_ROUNDS — but in practice the
drain stabilizes in 1-2 rounds.

Tests: tests/gateway/test_cancel_background_drain.py — 3 cases.
- test_cancel_background_tasks_drains_late_arrivals: spawn M1, start
  cancel, inject M2 during M1's shielded cleanup, verify M2 is
  cancelled.
- test_cancel_background_tasks_handles_no_tasks: no-op path still
  terminates cleanly.
- test_cancel_background_tasks_bounded_rounds: baseline — single
  task cancels in one round, loop terminates.

Regression-guard validated: against the unpatched implementation,
the late-arrival test fails with exactly the expected message
('task leaked').  With the fix it passes.

Blast radius is shutdown-only; the audit classified this as MED.
Shipping because the fix is small and the hygiene is worth it.

While investigating the audit's other MEDs (busy-handler double-ack,
Discord ExecApprovalView double-resolve, UpdatePromptView
double-resolve), I verified all three were false positives — the
check-and-set patterns have no await between them, so they're
atomic on single-threaded asyncio.  No fix needed for those.
											
										
										
											2026-04-19 01:48:42 -07:00
+								            # Loop: late-arrival tasks spawned during the gather above
 								            # will be in self._background_tasks now.  Re-check.
-												fix(gateway): cancel active runs during shutdown

Track adapter background message-processing tasks, cancel them during gateway shutdown, and interrupt running agents before disconnecting adapters. This prevents old gateway instances from continuing in-flight work after stop/replace, which was contributing to the restart-time task continuation/flicker behavior reported in #1414. Adds regression coverage for adapter task cancellation and shutdown interrupts.

											
										
										
											2026-03-15 04:21:50 -07:00
+								        self._background_tasks.clear()
-												fix(gateway): avoid false failure reactions on restart cancellation

											
										
										
											2026-04-08 16:07:07 -07:00
+								        self._expected_cancelled_tasks.clear()
-												fix(gateway): serialize reset command handoff and heal stale session locks

Closes the adapter-side half of the split-brain described in issue #11016
where _active_sessions stays live but nothing is processing, trapping the
chat in repeated 'Interrupting current task...' while /stop reports no
active task.

Changes on BasePlatformAdapter:
- Add _session_tasks: Dict[str, asyncio.Task] mapping session -> owner task
  so session-terminating commands can cancel the right task and old task
  finally blocks can't clobber a newer task's guard.
- Add _release_session_guard(guard=...) that only releases if the guard
  Event still matches, preventing races where /stop or /new swaps in a
  temporary guard while the old task unwinds.
- Add _session_task_is_stale() and _heal_stale_session_lock() for
  on-entry self-heal: when handle_message() sees an _active_sessions
  entry whose RECORDED owner task is done/cancelled, clear it and fall
  through to normal dispatch.  No owner task recorded = not stale (some
  tests install guards directly and shouldn't be auto-healed).
- Add cancel_session_processing() as the explicit adapter-side cancel
  API so /stop/ /new/ /reset can cleanly tear down in-flight work.
- Route /stop, /new, /reset through _dispatch_active_session_command():
    1. install a temporary command guard so follow-ups stay queued
    2. let the runner process the command
    3. cancel the old adapter task AFTER the runner response is ready
    4. release the command guard and drain the latest pending follow-up
- _start_session_processing() replaces the inline create_task + guard
  setup in handle_message() so guard + owner-task entry land atomically.
- cancel_background_tasks() also clears _session_tasks.

Combined, this means:
- /stop / /new / /reset actually cancel stuck work instead of leaving
  adapter state desynced from runner state.
- A dead session lock self-heals on the next inbound message rather than
  persisting until gateway restart.
- Follow-up messages after /new are processed in order, after the reset
  command's runner response lands.

Refs #11016

											
										
										
											2026-04-23 03:13:08 -07:00
+								        self._session_tasks.clear()
-												fix(gateway): cancel active runs during shutdown

Track adapter background message-processing tasks, cancel them during gateway shutdown, and interrupt running agents before disconnecting adapters. This prevents old gateway instances from continuing in-flight work after stop/replace, which was contributing to the restart-time task continuation/flicker behavior reported in #1414. Adds regression coverage for adapter task cancellation and shutdown interrupts.

											
										
										
											2026-03-15 04:21:50 -07:00
+								        self._pending_messages.clear()
 								        self._active_sessions.clear()
-												Implement interrupt handling for message processing in GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.

											
										
										
											2026-02-03 20:10:15 -08:00
+								    def has_pending_interrupt(self, session_key: str) -> bool:
 								        """Check if there's a pending interrupt for a session."""
 								        return session_key in self._active_sessions and self._active_sessions[session_key].is_set()
 								    def get_pending_message(self, session_key: str) -> Optional[MessageEvent]:
 								        """Get and clear any pending message for a session."""
-												Fix infinite interrupt loop in gateway by consuming pending messages with .pop() and clearing interrupt events before recursion

- Added logic to clear the adapter's interrupt event to prevent infinite loops during message processing.
- Updated the get_pending_message method to pop messages from the pending queue, ensuring proper message handling.

											
										
										
											2026-02-11 00:05:30 +00:00
+								        return self._pending_messages.pop(session_key, None)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    def build_source(
 								        self,
 								        chat_id: str,
 								        chat_name: Optional[str] = None,
 								        chat_type: str = "dm",
 								        user_id: Optional[str] = None,
 								        user_name: Optional[str] = None,
-												feat(gateway): include Discord channel topic in session context

Fixes #163

- Add chat_topic field to SessionSource dataclass
- Update to_dict/from_dict for serialization support
- Add chat_topic parameter to build_source helper
- Extract channel.topic in Discord adapter for messages and slash commands
- Display Channel Topic in system prompt when available
- Normalize empty topics to None

											
										
										
											2026-03-01 03:48:24 -05:00
+								        thread_id: Optional[str] = None,
 								        chat_topic: Optional[str] = None,
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								        user_id_alt: Optional[str] = None,
 								        chat_id_alt: Optional[str] = None,
-												fix(discord): DISCORD_ALLOW_BOTS=mentions/all now works without DISCORD_ALLOWED_USERS

Fixes #4466.

Root cause: two sequential authorization gates both independently rejected
bot messages, making DISCORD_ALLOW_BOTS completely ineffective.

Gate 1 — `discord.py` `on_message`:
    _is_allowed_user ran BEFORE the bot filter, so bot senders were dropped
    before the DISCORD_ALLOW_BOTS policy was ever evaluated.

Gate 2 — `gateway/run.py` _is_user_authorized:
    The gateway-level allowlist check rejected bot IDs with 'Unauthorized
    user: <bot_id>' even if they passed Gate 1.

Fix:

  gateway/platforms/discord.py — reorder on_message so DISCORD_ALLOW_BOTS
  runs BEFORE _is_allowed_user. Bots permitted by the filter skip the
  user allowlist; non-bots are still checked.

  gateway/session.py — add is_bot: bool = False to SessionSource so the
  gateway layer can distinguish bot senders.

  gateway/platforms/base.py — expose is_bot parameter in build_source.

  gateway/platforms/discord.py _handle_message — set is_bot=True when
  building the SessionSource for bot authors.

  gateway/run.py _is_user_authorized — when source.is_bot is True AND
  DISCORD_ALLOW_BOTS is 'mentions' or 'all', return True early. Platform
  filter already validated the message at on_message; don't re-reject.

Behavior matrix:

  | Config                                     | Before  | After   |
  | DISCORD_ALLOW_BOTS=none (default)          | Blocked | Blocked |
  | DISCORD_ALLOW_BOTS=all                     | Blocked | Allowed |
  | DISCORD_ALLOW_BOTS=mentions + @mention     | Blocked | Allowed |
  | DISCORD_ALLOW_BOTS=mentions, no mention    | Blocked | Blocked |
  | Human in DISCORD_ALLOWED_USERS             | Allowed | Allowed |
  | Human NOT in DISCORD_ALLOWED_USERS         | Blocked | Blocked |

Co-authored-by: Hermes Maintainer <hermes@nousresearch.com>

											
										
										
											2026-04-17 05:41:19 -07:00
+								        is_bot: bool = False,
-												feat(session): add guild_id/parent_chat_id/message_id to SessionSource

Groundwork for injecting raw platform identifiers into the agent's
system prompt.  Currently only `thread_id` is exposed as a raw ID —
callers in a Discord thread had to guess `channel_id == thread_id`
(which happens to work because threads are channels in Discord's REST
API) and had no way to reference the parent channel, guild, or the
triggering message.

Adds three optional fields:

- `guild_id` — Discord guild / Slack workspace / Matrix server scope
- `parent_chat_id` — parent channel when chat_id refers to a thread
- `message_id` — ID of the triggering message (pin/reply/react)

Extends `BasePlatformAdapter.build_source()` to accept + forward them
and teaches `to_dict`/`from_dict` to serialize them.  Behaviourally a
no-op: nothing reads the fields yet and they default to None.

											
										
										
											2026-04-25 00:09:12 +05:30
+								        guild_id: Optional[str] = None,
 								        parent_chat_id: Optional[str] = None,
 								        message_id: Optional[str] = None,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    ) -> SessionSource:
 								        """Helper to build a SessionSource for this platform."""
-												feat(gateway): include Discord channel topic in session context

Fixes #163

- Add chat_topic field to SessionSource dataclass
- Update to_dict/from_dict for serialization support
- Add chat_topic parameter to build_source helper
- Extract channel.topic in Discord adapter for messages and slash commands
- Display Channel Topic in system prompt when available
- Normalize empty topics to None

											
										
										
											2026-03-01 03:48:24 -05:00
+								        # Normalize empty topic to None
 								        if chat_topic is not None and not chat_topic.strip():
 								            chat_topic = None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        return SessionSource(
 								            platform=self.platform,
 								            chat_id=str(chat_id),
 								            chat_name=chat_name,
 								            chat_type=chat_type,
 								            user_id=str(user_id) if user_id else None,
 								            user_name=user_name,
 								            thread_id=str(thread_id) if thread_id else None,
-												feat(gateway): include Discord channel topic in session context

Fixes #163

- Add chat_topic field to SessionSource dataclass
- Update to_dict/from_dict for serialization support
- Add chat_topic parameter to build_source helper
- Extract channel.topic in Discord adapter for messages and slash commands
- Display Channel Topic in system prompt when available
- Normalize empty topics to None

											
										
										
											2026-03-01 03:48:24 -05:00
+								            chat_topic=chat_topic.strip() if chat_topic else None,
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								            user_id_alt=user_id_alt,
 								            chat_id_alt=chat_id_alt,
-												fix(discord): DISCORD_ALLOW_BOTS=mentions/all now works without DISCORD_ALLOWED_USERS

Fixes #4466.

Root cause: two sequential authorization gates both independently rejected
bot messages, making DISCORD_ALLOW_BOTS completely ineffective.

Gate 1 — `discord.py` `on_message`:
    _is_allowed_user ran BEFORE the bot filter, so bot senders were dropped
    before the DISCORD_ALLOW_BOTS policy was ever evaluated.

Gate 2 — `gateway/run.py` _is_user_authorized:
    The gateway-level allowlist check rejected bot IDs with 'Unauthorized
    user: <bot_id>' even if they passed Gate 1.

Fix:

  gateway/platforms/discord.py — reorder on_message so DISCORD_ALLOW_BOTS
  runs BEFORE _is_allowed_user. Bots permitted by the filter skip the
  user allowlist; non-bots are still checked.

  gateway/session.py — add is_bot: bool = False to SessionSource so the
  gateway layer can distinguish bot senders.

  gateway/platforms/base.py — expose is_bot parameter in build_source.

  gateway/platforms/discord.py _handle_message — set is_bot=True when
  building the SessionSource for bot authors.

  gateway/run.py _is_user_authorized — when source.is_bot is True AND
  DISCORD_ALLOW_BOTS is 'mentions' or 'all', return True early. Platform
  filter already validated the message at on_message; don't re-reject.

Behavior matrix:

  | Config                                     | Before  | After   |
  | DISCORD_ALLOW_BOTS=none (default)          | Blocked | Blocked |
  | DISCORD_ALLOW_BOTS=all                     | Blocked | Allowed |
  | DISCORD_ALLOW_BOTS=mentions + @mention     | Blocked | Allowed |
  | DISCORD_ALLOW_BOTS=mentions, no mention    | Blocked | Blocked |
  | Human in DISCORD_ALLOWED_USERS             | Allowed | Allowed |
  | Human NOT in DISCORD_ALLOWED_USERS         | Blocked | Blocked |

Co-authored-by: Hermes Maintainer <hermes@nousresearch.com>

											
										
										
											2026-04-17 05:41:19 -07:00
+								            is_bot=is_bot,
-												feat(session): add guild_id/parent_chat_id/message_id to SessionSource

Groundwork for injecting raw platform identifiers into the agent's
system prompt.  Currently only `thread_id` is exposed as a raw ID —
callers in a Discord thread had to guess `channel_id == thread_id`
(which happens to work because threads are channels in Discord's REST
API) and had no way to reference the parent channel, guild, or the
triggering message.

Adds three optional fields:

- `guild_id` — Discord guild / Slack workspace / Matrix server scope
- `parent_chat_id` — parent channel when chat_id refers to a thread
- `message_id` — ID of the triggering message (pin/reply/react)

Extends `BasePlatformAdapter.build_source()` to accept + forward them
and teaches `to_dict`/`from_dict` to serialize them.  Behaviourally a
no-op: nothing reads the fields yet and they default to None.

											
										
										
											2026-04-25 00:09:12 +05:30
+								            guild_id=str(guild_id) if guild_id else None,
 								            parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
 								            message_id=str(message_id) if message_id else None,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        )
 								    @abstractmethod
 								    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
 								        """
 								        Get information about a chat/channel.
 								        Returns dict with at least:
 								        - name: Chat name
 								        - type: "dm", "group", "channel"
 								        """
 								        pass
 								    def format_message(self, content: str) -> str:
 								        """
 								        Format a message for this platform.
 								        Override in subclasses to handle platform-specific formatting
 								        (e.g., Telegram MarkdownV2, Discord markdown).
 								        Default implementation returns content as-is.
 								        """
 								        return content
-												fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
											
										
										
											2026-03-17 01:52:43 -07:00
+								    @staticmethod
-												fix(telegram): use UTF-16 code units for message length splitting (#8725)

Port from nearai/ironclaw#2304: Telegram's 4096 character limit is
measured in UTF-16 code units, not Unicode codepoints. Characters
outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B,
musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units.

Previously, truncate_message() used Python's len() which counts
codepoints. This could produce chunks exceeding Telegram's actual limit
when messages contain many astral-plane characters.

Changes:
- Add utf16_len() helper and _prefix_within_utf16_limit() for
  UTF-16-aware string measurement and truncation
- Add _custom_unit_to_cp() binary-search helper that maps a custom-unit
  budget to the largest safe codepoint slice position
- Update truncate_message() to accept optional len_fn parameter
- Telegram adapter now passes len_fn=utf16_len when splitting messages
- Fix fallback truncation in Telegram error handler to use
  _prefix_within_utf16_limit instead of codepoint slicing
- Update send_message_tool.py to use utf16_len for Telegram platform
- Add comprehensive tests: utf16_len, _prefix_within_utf16_limit,
  truncate_message with len_fn (emoji splitting, content preservation,
  code block handling)
- Update mock lambdas in reply_mode tests to accept **kw for len_fn
											
										
										
											2026-04-12 19:06:20 -07:00
+								    def truncate_message(
 								        content: str,
 								        max_length: int = 4096,
 								        len_fn: Optional["Callable[[str], int]"] = None,
 								    ) -> List[str]:
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        """
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								        Split a long message into chunks, preserving code block boundaries.
 								        When a split falls inside a triple-backtick code block, the fence is
 								        closed at the end of the current chunk and reopened (with the original
 								        language tag) at the start of the next chunk.  Multi-chunk responses
 								        receive indicators like ``(1/3)``.
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        Args:
 								            content: The full message content
 								            max_length: Maximum length per chunk (platform-specific)
-												fix(telegram): use UTF-16 code units for message length splitting (#8725)

Port from nearai/ironclaw#2304: Telegram's 4096 character limit is
measured in UTF-16 code units, not Unicode codepoints. Characters
outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B,
musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units.

Previously, truncate_message() used Python's len() which counts
codepoints. This could produce chunks exceeding Telegram's actual limit
when messages contain many astral-plane characters.

Changes:
- Add utf16_len() helper and _prefix_within_utf16_limit() for
  UTF-16-aware string measurement and truncation
- Add _custom_unit_to_cp() binary-search helper that maps a custom-unit
  budget to the largest safe codepoint slice position
- Update truncate_message() to accept optional len_fn parameter
- Telegram adapter now passes len_fn=utf16_len when splitting messages
- Fix fallback truncation in Telegram error handler to use
  _prefix_within_utf16_limit instead of codepoint slicing
- Update send_message_tool.py to use utf16_len for Telegram platform
- Add comprehensive tests: utf16_len, _prefix_within_utf16_limit,
  truncate_message with len_fn (emoji splitting, content preservation,
  code block handling)
- Update mock lambdas in reply_mode tests to accept **kw for len_fn
											
										
										
											2026-04-12 19:06:20 -07:00
+								            len_fn: Optional length function for measuring string length.
 								                     Defaults to ``len`` (Unicode code-points).  Pass
 								                     ``utf16_len`` for platforms that measure message
 								                     length in UTF-16 code units (e.g. Telegram).
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        Returns:
 								            List of message chunks
 								        """
-												fix(telegram): use UTF-16 code units for message length splitting (#8725)

Port from nearai/ironclaw#2304: Telegram's 4096 character limit is
measured in UTF-16 code units, not Unicode codepoints. Characters
outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B,
musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units.

Previously, truncate_message() used Python's len() which counts
codepoints. This could produce chunks exceeding Telegram's actual limit
when messages contain many astral-plane characters.

Changes:
- Add utf16_len() helper and _prefix_within_utf16_limit() for
  UTF-16-aware string measurement and truncation
- Add _custom_unit_to_cp() binary-search helper that maps a custom-unit
  budget to the largest safe codepoint slice position
- Update truncate_message() to accept optional len_fn parameter
- Telegram adapter now passes len_fn=utf16_len when splitting messages
- Fix fallback truncation in Telegram error handler to use
  _prefix_within_utf16_limit instead of codepoint slicing
- Update send_message_tool.py to use utf16_len for Telegram platform
- Add comprehensive tests: utf16_len, _prefix_within_utf16_limit,
  truncate_message with len_fn (emoji splitting, content preservation,
  code block handling)
- Update mock lambdas in reply_mode tests to accept **kw for len_fn
											
										
										
											2026-04-12 19:06:20 -07:00
+								        _len = len_fn or len
 								        if _len(content) <= max_length:
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            return [content]
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
 								        INDICATOR_RESERVE = 10   # room for " (XX/XX)"
 								        FENCE_CLOSE = "\n```"
 								        chunks: List[str] = []
 								        remaining = content
 								        # When the previous chunk ended mid-code-block, this holds the
 								        # language tag (possibly "") so we can reopen the fence.
 								        carry_lang: Optional[str] = None
 								        while remaining:
 								            # If we're continuing a code block from the previous chunk,
 								            # prepend a new opening fence with the same language tag.
 								            prefix = f"```{carry_lang}\n" if carry_lang is not None else ""
 								            # How much body text we can fit after accounting for the prefix,
 								            # a potential closing fence, and the chunk indicator.
-												fix(telegram): use UTF-16 code units for message length splitting (#8725)

Port from nearai/ironclaw#2304: Telegram's 4096 character limit is
measured in UTF-16 code units, not Unicode codepoints. Characters
outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B,
musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units.

Previously, truncate_message() used Python's len() which counts
codepoints. This could produce chunks exceeding Telegram's actual limit
when messages contain many astral-plane characters.

Changes:
- Add utf16_len() helper and _prefix_within_utf16_limit() for
  UTF-16-aware string measurement and truncation
- Add _custom_unit_to_cp() binary-search helper that maps a custom-unit
  budget to the largest safe codepoint slice position
- Update truncate_message() to accept optional len_fn parameter
- Telegram adapter now passes len_fn=utf16_len when splitting messages
- Fix fallback truncation in Telegram error handler to use
  _prefix_within_utf16_limit instead of codepoint slicing
- Update send_message_tool.py to use utf16_len for Telegram platform
- Add comprehensive tests: utf16_len, _prefix_within_utf16_limit,
  truncate_message with len_fn (emoji splitting, content preservation,
  code block handling)
- Update mock lambdas in reply_mode tests to accept **kw for len_fn
											
										
										
											2026-04-12 19:06:20 -07:00
+								            headroom = max_length - INDICATOR_RESERVE - _len(prefix) - _len(FENCE_CLOSE)
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								            if headroom < 1:
 								                headroom = max_length // 2
 								            # Everything remaining fits in one final chunk
-												fix(telegram): use UTF-16 code units for message length splitting (#8725)

Port from nearai/ironclaw#2304: Telegram's 4096 character limit is
measured in UTF-16 code units, not Unicode codepoints. Characters
outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B,
musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units.

Previously, truncate_message() used Python's len() which counts
codepoints. This could produce chunks exceeding Telegram's actual limit
when messages contain many astral-plane characters.

Changes:
- Add utf16_len() helper and _prefix_within_utf16_limit() for
  UTF-16-aware string measurement and truncation
- Add _custom_unit_to_cp() binary-search helper that maps a custom-unit
  budget to the largest safe codepoint slice position
- Update truncate_message() to accept optional len_fn parameter
- Telegram adapter now passes len_fn=utf16_len when splitting messages
- Fix fallback truncation in Telegram error handler to use
  _prefix_within_utf16_limit instead of codepoint slicing
- Update send_message_tool.py to use utf16_len for Telegram platform
- Add comprehensive tests: utf16_len, _prefix_within_utf16_limit,
  truncate_message with len_fn (emoji splitting, content preservation,
  code block handling)
- Update mock lambdas in reply_mode tests to accept **kw for len_fn
											
										
										
											2026-04-12 19:06:20 -07:00
+								            if _len(prefix) + _len(remaining) <= max_length - INDICATOR_RESERVE:
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                chunks.append(prefix + remaining)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                break
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
-												fix(telegram): use UTF-16 code units for message length splitting (#8725)

Port from nearai/ironclaw#2304: Telegram's 4096 character limit is
measured in UTF-16 code units, not Unicode codepoints. Characters
outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B,
musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units.

Previously, truncate_message() used Python's len() which counts
codepoints. This could produce chunks exceeding Telegram's actual limit
when messages contain many astral-plane characters.

Changes:
- Add utf16_len() helper and _prefix_within_utf16_limit() for
  UTF-16-aware string measurement and truncation
- Add _custom_unit_to_cp() binary-search helper that maps a custom-unit
  budget to the largest safe codepoint slice position
- Update truncate_message() to accept optional len_fn parameter
- Telegram adapter now passes len_fn=utf16_len when splitting messages
- Fix fallback truncation in Telegram error handler to use
  _prefix_within_utf16_limit instead of codepoint slicing
- Update send_message_tool.py to use utf16_len for Telegram platform
- Add comprehensive tests: utf16_len, _prefix_within_utf16_limit,
  truncate_message with len_fn (emoji splitting, content preservation,
  code block handling)
- Update mock lambdas in reply_mode tests to accept **kw for len_fn
											
										
										
											2026-04-12 19:06:20 -07:00
+								            # Find a natural split point (prefer newlines, then spaces).
 								            # When _len != len (e.g. utf16_len for Telegram), headroom is
 								            # measured in the custom unit.  We need codepoint-based slice
 								            # positions that stay within the custom-unit budget.
 								            #
 								            # _safe_slice_pos() maps a custom-unit budget to the largest
 								            # codepoint offset whose custom length ≤ budget.
 								            if _len is not len:
 								                # Map headroom (custom units) → codepoint slice length
 								                _cp_limit = _custom_unit_to_cp(remaining, headroom, _len)
 								            else:
 								                _cp_limit = headroom
 								            region = remaining[:_cp_limit]
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								            split_at = region.rfind("\n")
-												fix(telegram): use UTF-16 code units for message length splitting (#8725)

Port from nearai/ironclaw#2304: Telegram's 4096 character limit is
measured in UTF-16 code units, not Unicode codepoints. Characters
outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B,
musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units.

Previously, truncate_message() used Python's len() which counts
codepoints. This could produce chunks exceeding Telegram's actual limit
when messages contain many astral-plane characters.

Changes:
- Add utf16_len() helper and _prefix_within_utf16_limit() for
  UTF-16-aware string measurement and truncation
- Add _custom_unit_to_cp() binary-search helper that maps a custom-unit
  budget to the largest safe codepoint slice position
- Update truncate_message() to accept optional len_fn parameter
- Telegram adapter now passes len_fn=utf16_len when splitting messages
- Fix fallback truncation in Telegram error handler to use
  _prefix_within_utf16_limit instead of codepoint slicing
- Update send_message_tool.py to use utf16_len for Telegram platform
- Add comprehensive tests: utf16_len, _prefix_within_utf16_limit,
  truncate_message with len_fn (emoji splitting, content preservation,
  code block handling)
- Update mock lambdas in reply_mode tests to accept **kw for len_fn
											
										
										
											2026-04-12 19:06:20 -07:00
+								            if split_at < _cp_limit // 2:
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                split_at = region.rfind(" ")
 								            if split_at < 1:
-												fix(telegram): use UTF-16 code units for message length splitting (#8725)

Port from nearai/ironclaw#2304: Telegram's 4096 character limit is
measured in UTF-16 code units, not Unicode codepoints. Characters
outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B,
musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units.

Previously, truncate_message() used Python's len() which counts
codepoints. This could produce chunks exceeding Telegram's actual limit
when messages contain many astral-plane characters.

Changes:
- Add utf16_len() helper and _prefix_within_utf16_limit() for
  UTF-16-aware string measurement and truncation
- Add _custom_unit_to_cp() binary-search helper that maps a custom-unit
  budget to the largest safe codepoint slice position
- Update truncate_message() to accept optional len_fn parameter
- Telegram adapter now passes len_fn=utf16_len when splitting messages
- Fix fallback truncation in Telegram error handler to use
  _prefix_within_utf16_limit instead of codepoint slicing
- Update send_message_tool.py to use utf16_len for Telegram platform
- Add comprehensive tests: utf16_len, _prefix_within_utf16_limit,
  truncate_message with len_fn (emoji splitting, content preservation,
  code block handling)
- Update mock lambdas in reply_mode tests to accept **kw for len_fn
											
										
										
											2026-04-12 19:06:20 -07:00
+								                split_at = _cp_limit
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
-												fix: send_animation metadata, MarkdownV2 inline code splitting, tirith cosign-free install (#1626)

* fix: Anthropic OAuth compatibility — Claude Code identity fingerprinting

Anthropic routes OAuth/subscription requests based on Claude Code's
identity markers. Without them, requests get intermittent 500 errors
(~25% failure rate observed). This matches what pi-ai (clawdbot) and
OpenCode both implement for OAuth compatibility.

Changes (OAuth tokens only — API key users unaffected):

1. Headers: user-agent 'claude-cli/2.1.2 (external, cli)' + x-app 'cli'
2. System prompt: prepend 'You are Claude Code, Anthropic's official CLI'
3. System prompt sanitization: replace Hermes/Nous references
4. Tool names: prefix with 'mcp_' (Claude Code convention for non-native tools)
5. Tool name stripping: remove 'mcp_' prefix from response tool calls

Before: 9/12 OK, 1 hard fail, 4 needed retries (~25% error rate)
After: 16/16 OK, 0 failures, 0 retries (0% error rate)

* fix: three gateway issues from user error logs

1. send_animation missing metadata kwarg (base.py)
   - Base class send_animation lacked the metadata parameter that the
     call site in base.py line 917 passes. Telegram's override accepted
     it, but any platform without an override (Discord, Slack, etc.)
     hit TypeError. Added metadata to base class signature.

2. MarkdownV2 split-inside-inline-code (base.py truncate_message)
   - truncate_message could split at a space inside an inline code span
     (e.g. `function(arg1, arg2)`), leaving an unpaired backtick and
     unescaped parentheses in the chunk. Telegram rejects with
     'character ( is reserved'. Added inline code awareness to the
     split-point finder — detects odd backtick counts and moves the
     split before the code span.

3. tirith auto-install without cosign (tirith_security.py)
   - Previously required cosign on PATH for auto-install, blocking
     install entirely with a warning if missing. Now proceeds with
     SHA-256 checksum verification only when cosign is unavailable.
     Cosign is still used for full supply chain verification when
     present. If cosign IS present but verification explicitly fails,
     install is still aborted (tampered release).
											
										
										
											2026-03-16 23:39:41 -07:00
+								            # Avoid splitting inside an inline code span (`...`).
 								            # If the text before split_at has an odd number of unescaped
 								            # backticks, the split falls inside inline code — the resulting
 								            # chunk would have an unpaired backtick and any special characters
 								            # (like parentheses) inside the broken span would be unescaped,
 								            # causing MarkdownV2 parse errors on Telegram.
 								            candidate = remaining[:split_at]
 								            backtick_count = candidate.count("`") - candidate.count("\\`")
 								            if backtick_count % 2 == 1:
 								                # Find the last unescaped backtick and split before it
 								                last_bt = candidate.rfind("`")
 								                while last_bt > 0 and candidate[last_bt - 1] == "\\":
 								                    last_bt = candidate.rfind("`", 0, last_bt)
 								                if last_bt > 0:
 								                    # Try to find a space or newline just before the backtick
 								                    safe_split = candidate.rfind(" ", 0, last_bt)
 								                    nl_split = candidate.rfind("\n", 0, last_bt)
 								                    safe_split = max(safe_split, nl_split)
-												fix(telegram): use UTF-16 code units for message length splitting (#8725)

Port from nearai/ironclaw#2304: Telegram's 4096 character limit is
measured in UTF-16 code units, not Unicode codepoints. Characters
outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B,
musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units.

Previously, truncate_message() used Python's len() which counts
codepoints. This could produce chunks exceeding Telegram's actual limit
when messages contain many astral-plane characters.

Changes:
- Add utf16_len() helper and _prefix_within_utf16_limit() for
  UTF-16-aware string measurement and truncation
- Add _custom_unit_to_cp() binary-search helper that maps a custom-unit
  budget to the largest safe codepoint slice position
- Update truncate_message() to accept optional len_fn parameter
- Telegram adapter now passes len_fn=utf16_len when splitting messages
- Fix fallback truncation in Telegram error handler to use
  _prefix_within_utf16_limit instead of codepoint slicing
- Update send_message_tool.py to use utf16_len for Telegram platform
- Add comprehensive tests: utf16_len, _prefix_within_utf16_limit,
  truncate_message with len_fn (emoji splitting, content preservation,
  code block handling)
- Update mock lambdas in reply_mode tests to accept **kw for len_fn
											
										
										
											2026-04-12 19:06:20 -07:00
+								                    if safe_split > _cp_limit // 4:
-												fix: send_animation metadata, MarkdownV2 inline code splitting, tirith cosign-free install (#1626)

* fix: Anthropic OAuth compatibility — Claude Code identity fingerprinting

Anthropic routes OAuth/subscription requests based on Claude Code's
identity markers. Without them, requests get intermittent 500 errors
(~25% failure rate observed). This matches what pi-ai (clawdbot) and
OpenCode both implement for OAuth compatibility.

Changes (OAuth tokens only — API key users unaffected):

1. Headers: user-agent 'claude-cli/2.1.2 (external, cli)' + x-app 'cli'
2. System prompt: prepend 'You are Claude Code, Anthropic's official CLI'
3. System prompt sanitization: replace Hermes/Nous references
4. Tool names: prefix with 'mcp_' (Claude Code convention for non-native tools)
5. Tool name stripping: remove 'mcp_' prefix from response tool calls

Before: 9/12 OK, 1 hard fail, 4 needed retries (~25% error rate)
After: 16/16 OK, 0 failures, 0 retries (0% error rate)

* fix: three gateway issues from user error logs

1. send_animation missing metadata kwarg (base.py)
   - Base class send_animation lacked the metadata parameter that the
     call site in base.py line 917 passes. Telegram's override accepted
     it, but any platform without an override (Discord, Slack, etc.)
     hit TypeError. Added metadata to base class signature.

2. MarkdownV2 split-inside-inline-code (base.py truncate_message)
   - truncate_message could split at a space inside an inline code span
     (e.g. `function(arg1, arg2)`), leaving an unpaired backtick and
     unescaped parentheses in the chunk. Telegram rejects with
     'character ( is reserved'. Added inline code awareness to the
     split-point finder — detects odd backtick counts and moves the
     split before the code span.

3. tirith auto-install without cosign (tirith_security.py)
   - Previously required cosign on PATH for auto-install, blocking
     install entirely with a warning if missing. Now proceeds with
     SHA-256 checksum verification only when cosign is unavailable.
     Cosign is still used for full supply chain verification when
     present. If cosign IS present but verification explicitly fails,
     install is still aborted (tampered release).
											
										
										
											2026-03-16 23:39:41 -07:00
+								                        split_at = safe_split
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								            chunk_body = remaining[:split_at]
 								            remaining = remaining[split_at:].lstrip()
 								            full_chunk = prefix + chunk_body
-												fix: platform base extract_images and truncate_message bugs + tests

- extract_images: only remove extracted image tags from content, preserve
  non-image markdown links (e.g. PDFs) that were previously silently lost
- truncate_message: walk only chunk_body (not prepended prefix) so the
  reopened code fence does not toggle in_code off, leaving continuation
  chunks with unclosed code blocks
- Add 49 unit tests covering MessageEvent command parsing, extract_images,
  extract_media, truncate_message code block handling, and _get_human_delay

											
										
										
											2026-02-28 21:21:03 +03:00
+								            # Walk only the chunk_body (not the prefix we prepended) to
 								            # determine whether we end inside an open code block.
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								            in_code = carry_lang is not None
 								            lang = carry_lang or ""
-												fix: platform base extract_images and truncate_message bugs + tests

- extract_images: only remove extracted image tags from content, preserve
  non-image markdown links (e.g. PDFs) that were previously silently lost
- truncate_message: walk only chunk_body (not prepended prefix) so the
  reopened code fence does not toggle in_code off, leaving continuation
  chunks with unclosed code blocks
- Add 49 unit tests covering MessageEvent command parsing, extract_images,
  extract_media, truncate_message code block handling, and _get_human_delay

											
										
										
											2026-02-28 21:21:03 +03:00
+								            for line in chunk_body.split("\n"):
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                stripped = line.strip()
 								                if stripped.startswith("```"):
 								                    if in_code:
 								                        in_code = False
 								                        lang = ""
 								                    else:
 								                        in_code = True
 								                        tag = stripped[3:].strip()
 								                        lang = tag.split()[0] if tag else ""
 								            if in_code:
 								                # Close the orphaned fence so the chunk is valid on its own
 								                full_chunk += FENCE_CLOSE
 								                carry_lang = lang
 								            else:
 								                carry_lang = None
 								            chunks.append(full_chunk)
 								        # Append chunk indicators when the response spans multiple messages
 								        if len(chunks) > 1:
 								            total = len(chunks)
 								            chunks = [
 								                f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
 								            ]
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        return chunks