cli-config.yaml.example

# Hermes Agent CLI Configuration
# Copy this file to cli-config.yaml and customize as needed.
# This file configures the CLI behavior. Environment variables in .env take precedence.

# =============================================================================
# Model Configuration
# =============================================================================
model:
  # Default model to use (can be overridden with --model flag)
  # Both "default" and "model" work as the key name here.
  default: "anthropic/claude-opus-4.6"
  
  # Inference provider selection:
  #   "auto"         - Auto-detect from credentials (default)
  #   "openrouter"   - OpenRouter (requires: OPENROUTER_API_KEY or OPENAI_API_KEY)
  #   "nous"         - Nous Portal OAuth (requires: hermes login)
  #   "nous-api"     - Nous Portal API key (requires: NOUS_API_KEY)
  #   "anthropic"    - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
  #   "openai-codex" - OpenAI Codex (requires: hermes auth)
  #   "copilot"      - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
  #   "gemini"      - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
  #   "zai"         - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
  #   "kimi-coding"  - Kimi / Moonshot AI (requires: KIMI_API_KEY)
  #   "minimax"      - MiniMax global (requires: MINIMAX_API_KEY)
  #   "minimax-cn"   - MiniMax China (requires: MINIMAX_CN_API_KEY)
  #   "huggingface"  - Hugging Face Inference (requires: HF_TOKEN)
  #   "nvidia"       - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY)
  #   "xiaomi"       - Xiaomi MiMo (requires: XIAOMI_API_KEY)
  #   "arcee"        - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
  #
  # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
  #   "custom"       - Any OpenAI-compatible endpoint. Set base_url below.
  #   Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
  #   Example for LM Studio:
  #     provider: "lmstudio"
  #     base_url: "http://localhost:1234/v1"
  #   No API key needed — local servers typically ignore auth.
  #
  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
  provider: "auto"
  
  # API configuration (falls back to OPENROUTER_API_KEY env var)
  # api_key: "your-key-here"  # Uncomment to set here instead of .env
  base_url: "https://openrouter.ai/api/v1"

  # ── Token limits — two settings, easy to confuse ──────────────────────────
  #
  # context_length: TOTAL context window (input + output tokens combined).
  #   Controls when Hermes compresses history and validates requests.
  #   Leave unset — Hermes auto-detects the correct value from the provider.
  #   Set manually only when auto-detection is wrong (e.g. a local server with
  #   a custom num_ctx, or a proxy that doesn't expose /v1/models).
  #
  # context_length: 131072
  #
  # max_tokens: OUTPUT cap — maximum tokens the model may generate per response.
  #   Unrelated to how long your conversation history can be.
  #   The OpenAI-standard name "max_tokens" is a misnomer; Anthropic's native
  #   API has since renamed it "max_output_tokens" for clarity.
  #   Leave unset to use the model's native output ceiling (recommended).
  #   Set only if you want to deliberately limit individual response length.
  #
# max_tokens: 8192

# Named provider overrides (optional)
# Use this for per-provider request timeouts, non-stream stale timeouts,
# and per-model exceptions.
# Applies to the primary turn client on every api_mode (OpenAI-wire, native
# Anthropic, and Anthropic-compatible providers), the fallback chain, and
# client rebuilds during credential rotation.  For OpenAI-wire chat
# completions (streaming and non-streaming) the configured value is also
# used as the per-request ``timeout=`` kwarg so it wins over the legacy
# HERMES_API_TIMEOUT env var (which still applies when no config is set).
# ``stale_timeout_seconds`` controls the non-streaming stale-call detector and
# wins over the legacy HERMES_API_CALL_STALE_TIMEOUT env var. Leaving these
# unset keeps the legacy defaults (HERMES_API_TIMEOUT=1800s,
# HERMES_API_CALL_STALE_TIMEOUT=300s, native Anthropic 900s).
#
# Not currently wired for AWS Bedrock (bedrock_converse + AnthropicBedrock
# SDK paths) — those use boto3 with its own timeout configuration.
#
# providers:
#   ollama-local:
#     request_timeout_seconds: 300   # Longer timeout for local cold-starts
#     stale_timeout_seconds: 900     # Explicitly re-enable stale detection on local endpoints
#   anthropic:
#     request_timeout_seconds: 30    # Fast-fail cloud requests
#     models:
#       claude-opus-4.6:
#         timeout_seconds: 600       # Longer timeout for extended-thinking Opus calls
#   openai-codex:
#     models:
#       gpt-5.4:
#         stale_timeout_seconds: 1800  # Longer non-stream stale timeout for slow large-context turns

# =============================================================================
# OpenRouter Provider Routing (only applies when using OpenRouter)
# =============================================================================
# Control how requests are routed across providers on OpenRouter.
# See: https://openrouter.ai/docs/guides/routing/provider-selection
#
# provider_routing:
#   # Sort strategy: "price" (default), "throughput", or "latency"
#   # Append :nitro to model name for a shortcut to throughput sorting.
#   sort: "throughput"
#
#   # Only allow these providers (provider slugs from OpenRouter)
#   # only: ["anthropic", "google"]
#
#   # Skip these providers entirely
#   # ignore: ["deepinfra", "fireworks"]
#
#   # Try providers in this order (overrides default load balancing)
#   # order: ["anthropic", "google", "together"]
#
#   # Require providers to support all parameters in your request
#   # require_parameters: true
#
#   # Data policy: "allow" (default) or "deny" to exclude providers that may store data
#   # data_collection: "deny"

# =============================================================================
# Git Worktree Isolation
# =============================================================================
# When enabled, each CLI session creates an isolated git worktree so multiple
# agents can work on the same repo concurrently without file collisions.
# Equivalent to always passing --worktree / -w on the command line.
#
# worktree: true    # Always create a worktree when in a git repo
# worktree: false   # Default — only create when -w flag is passed

# =============================================================================
# Terminal Tool Configuration
# =============================================================================
# Choose ONE of the following terminal configurations by uncommenting it.
# The terminal tool executes commands in the specified environment.

# -----------------------------------------------------------------------------
# OPTION 1: Local execution (default)
# Commands run directly on your machine in the current directory
# -----------------------------------------------------------------------------
# Working directory behavior:
#   - CLI (`hermes` command): Uses "." (current directory where you run hermes)
#   - Messaging (Telegram/Discord): Uses MESSAGING_CWD from .env (default: home)
terminal:
  backend: "local"
  cwd: "."  # For local backend: "." = current directory. Ignored for remote backends unless a backend documents otherwise.
  timeout: 180
  docker_mount_cwd_to_workspace: false  # SECURITY: off by default. Opt in to mount the launch cwd into Docker /workspace.
  lifetime_seconds: 300
  # sudo_password: "hunter2"  # Optional: pipe a sudo password via sudo -S. SECURITY WARNING: plaintext.
  # sudo_password: ""         # Explicit empty password: try empty and never open the interactive sudo prompt.

# -----------------------------------------------------------------------------
# OPTION 2: SSH remote execution
# Commands run on a remote server - agent code stays local (sandboxed)
# Great for: keeping agent isolated from its own code, using powerful remote hardware
# -----------------------------------------------------------------------------
# terminal:
#   backend: "ssh"
#   cwd: "/home/myuser/project"  # Path on the REMOTE server
#   timeout: 180
#   lifetime_seconds: 300
#   ssh_host: "my-server.example.com"
#   ssh_user: "myuser"
#   ssh_port: 22
#   ssh_key: "~/.ssh/id_rsa"  # Optional - uses ssh-agent if not specified

# -----------------------------------------------------------------------------
# OPTION 3: Docker container
# Commands run in an isolated Docker container
# Great for: reproducible environments, testing, isolation
# -----------------------------------------------------------------------------
# terminal:
#   backend: "docker"
#   cwd: "/workspace"  # Path INSIDE the container (default: /)
#   timeout: 180
#   lifetime_seconds: 300
#   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
#   docker_mount_cwd_to_workspace: true   # Explicit opt-in: mount your launch cwd into /workspace
#   # Optional: explicitly forward selected env vars into Docker.
#   # These values come from your current shell first, then ~/.hermes/.env.
#   # Warning: anything forwarded here is visible to commands run in the container.
#   docker_forward_env:
#     - "GITHUB_TOKEN"
#     - "NPM_TOKEN"

# -----------------------------------------------------------------------------
# OPTION 4: Singularity/Apptainer container
# Commands run in a Singularity container (common in HPC environments)
# Great for: HPC clusters, shared compute environments
# -----------------------------------------------------------------------------
# terminal:
#   backend: "singularity"
#   cwd: "/workspace"  # Path INSIDE the container (default: /root)
#   timeout: 180
#   lifetime_seconds: 300
#   singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20"

# -----------------------------------------------------------------------------
# OPTION 5: Modal cloud execution
# Commands run on Modal's cloud infrastructure
# Great for: GPU access, scalable compute, serverless execution
# -----------------------------------------------------------------------------
# terminal:
#   backend: "modal"
#   cwd: "/workspace"  # Path INSIDE the sandbox (default: /root)
#   timeout: 180
#   lifetime_seconds: 300
#   modal_image: "nikolaik/python-nodejs:python3.11-nodejs20"

# -----------------------------------------------------------------------------
# OPTION 6: Daytona cloud execution
# Commands run in Daytona cloud sandboxes
# Great for: Cloud dev environments, persistent workspaces, team collaboration
# Requires: pip install daytona, DAYTONA_API_KEY env var
# -----------------------------------------------------------------------------
# terminal:
#   backend: "daytona"
#   cwd: "~"
#   timeout: 180
#   lifetime_seconds: 300
#   daytona_image: "nikolaik/python-nodejs:python3.11-nodejs20"
#   container_disk: 10240          # Daytona max is 10GB per sandbox

#
# --- Container resource limits (docker, singularity, modal, daytona -- ignored for local/ssh) ---
# These settings apply to all container backends. They control the resources
# allocated to the sandbox and whether its filesystem persists across sessions.
  container_cpu: 1              # CPU cores
  container_memory: 5120        # Memory in MB (5120 = 5GB)
  container_disk: 51200         # Disk in MB (51200 = 50GB)
  container_persistent: true    # Persist filesystem across sessions (false = ephemeral)

# -----------------------------------------------------------------------------
# SUDO SUPPORT (works with ALL backends above)
# -----------------------------------------------------------------------------
# Add sudo_password to any terminal config above to enable sudo commands.
# The password is piped via `sudo -S`. Works with local, ssh, docker, etc.
#
# SECURITY WARNING: Password stored in plaintext!
#
# INTERACTIVE PROMPT: If sudo_password is unset and the CLI is running,
# you'll be prompted to enter your password when sudo is needed:
# - 45-second timeout (auto-skips if no input)
# - Press Enter to skip (command fails gracefully)
# - Password is hidden while typing
# - Password is cached for the session
#
# EMPTY PASSWORDS: Setting sudo_password to an explicit empty string is different
# from leaving it unset. Hermes will try an empty password via `sudo -S` and
# will not open the interactive prompt. This is useful for passwordless sudo,
# Touch ID sudo setups, and environments where prompting is just noise.
#
# ALTERNATIVES:
# - SSH backend: Configure passwordless sudo on the remote server
# - Containers: Run as root inside the container (no sudo needed)
# - Local: Configure /etc/sudoers for specific commands
#
# Example (add to your terminal section):
#   sudo_password: "your-password-here"

# =============================================================================
# Security Scanning (tirith)
# =============================================================================
# Optional pre-exec command security scanning via tirith.
# Detects homograph URLs, pipe-to-shell, terminal injection, env manipulation.
# Install: brew install sheeki03/tap/tirith
# Docs: https://github.com/sheeki03/tirith
#
# security:
#   tirith_enabled: true        # Enable/disable tirith scanning
#   tirith_path: "tirith"       # Path to tirith binary (supports ~ expansion)
#   tirith_timeout: 5           # Scan timeout in seconds
#   tirith_fail_open: true      # Allow commands if tirith unavailable

# =============================================================================
# Browser Tool Configuration
# =============================================================================
browser:
  # Inactivity timeout in seconds - browser sessions are automatically closed
  # after this period of no activity between agent loops (default: 120 = 2 minutes)
  inactivity_timeout: 120

# =============================================================================
# Context Compression (Auto-shrinks long conversations)
# =============================================================================
# When conversation approaches model's context limit, middle turns are
# automatically summarized to free up space while preserving important context.
#
# HOW IT WORKS:
# 1. Tracks actual token usage from API responses (not estimates)
# 2. When prompt_tokens >= threshold% of model's context_length, triggers compression
# 3. Protects first 3 turns (system prompt, initial request, first response)
# 4. Protects last N turns (default 20 messages = ~10 full turns of recent context)
# 5. Summarizes middle turns using a fast/cheap model
# 6. Inserts summary as a user message, continues conversation seamlessly
#
# Post-compression tail budget is target_ratio × threshold × context_length:
#   200K context, threshold 0.50, ratio 0.20 → 20K tokens of recent tail preserved
#   1M   context, threshold 0.50, ratio 0.20 → 100K tokens of recent tail preserved
#
compression:
  # Enable automatic context compression (default: true)
  # Set to false if you prefer to manage context manually or want errors on overflow
  enabled: true
  
  # Trigger compression at this % of model's context limit (default: 0.50 = 50%)
  # Lower values = more aggressive compression, higher values = compress later
  threshold: 0.50
  
  # Fraction of the threshold to preserve as recent tail (default: 0.20 = 20%)
  # e.g. 20% of 50% threshold = 10% of total context kept as recent messages.
  # Summary output is separately capped at 12K tokens (Gemini output limit).
  # Range: 0.10 - 0.80
  target_ratio: 0.20

  # Number of most-recent messages to always preserve (default: 20 ≈ 10 full turns)
  # Higher values keep more recent conversation intact at the cost of more aggressive
  # compression of older turns.
  protect_last_n: 20

  # To pin a specific model/provider for compression summaries, use the
  # auxiliary section below (auxiliary.compression.provider / model).

# =============================================================================
# Anthropic prompt caching TTL
# =============================================================================
# When prompt caching is active (Claude via OpenRouter or native Anthropic),
# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
# "1h". Other values are ignored and "5m" is used.
#
prompt_caching:
  cache_ttl: "5m" # use "1h" for long sessions with pauses between turns

# =============================================================================
# Auxiliary Models (Advanced — Experimental)
# =============================================================================
# Hermes uses lightweight "auxiliary" models for side tasks: image analysis,
# browser screenshot analysis, web page summarization, and context compression.
#
# By default these use Gemini Flash via OpenRouter or Nous Portal and are
# auto-detected from your credentials.  You do NOT need to change anything
# here for normal usage.
#
# WARNING: Overriding these with providers other than OpenRouter or Nous Portal
# is EXPERIMENTAL and may not work.  Not all models/providers support vision,
# produce usable summaries, or accept the same API format.  Change at your own
# risk — if things break, reset to "auto" / empty values.
#
# Each task has its own provider + model pair so you can mix providers.
# For example: OpenRouter for vision (needs multimodal), but your main
# local endpoint for compression (just needs text).
#
# Provider options:
#   "auto"       - Best available: OpenRouter → Nous Portal → main endpoint (default)
#   "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
#   "nous"       - Force Nous Portal (requires: hermes login)
#   "gemini"      - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
#   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY)
#   "codex"       - Force Codex OAuth (requires: hermes model → Codex).
#                  Uses gpt-5.3-codex which supports vision.
#   "main"       - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
#                  Works with OpenAI API, local models, or any OpenAI-compatible
#                  endpoint.  Also falls back to Codex OAuth and API-key providers.
#
# Model: leave empty to use the provider's default.  When empty, OpenRouter
# uses "google/gemini-3-flash-preview" and Nous uses "gemini-3-flash".
# Other providers pick a sensible default automatically.
#
# auxiliary:
#   # Image analysis: vision_analyze tool + browser screenshots
#   vision:
#     provider: "auto"
#     model: ""              # e.g. "google/gemini-2.5-flash", "openai/gpt-4o"
#     timeout: 30            # LLM API call timeout (seconds)
#     download_timeout: 30   # Image HTTP download timeout (seconds)
#                            # Increase for slow connections or self-hosted image servers
#
#   # Web page scraping / summarization + browser page text extraction
#   web_extract:
#     provider: "auto"
#     model: ""
#
#   # Session search — summarizes matching past sessions
#   session_search:
#     provider: "auto"
#     model: ""
#     timeout: 30
#     max_concurrency: 3    # Limit parallel summaries to reduce request-burst 429s
#     extra_body: {}        # Provider-specific OpenAI-compatible request fields
#                           # Example for providers that support request-body
#                           # reasoning controls:
#                           # extra_body:
#                           #   enable_thinking: false

# =============================================================================
# Persistent Memory
# =============================================================================
# Bounded curated memory injected into the system prompt every session.
# Two stores: MEMORY.md (agent's notes) and USER.md (user profile).
# Character limits keep the memory small and focused. The agent manages
# pruning -- when at the limit, it must consolidate or replace entries.
# Disabled by default in batch_runner and RL environments.
#
memory:
  # Agent's personal notes: environment facts, conventions, things learned
  memory_enabled: true
  
  # User profile: preferences, communication style, expectations
  user_profile_enabled: true
  
  # Character limits (~2.75 chars per token, model-independent)
  memory_char_limit: 2200   # ~800 tokens
  user_char_limit: 1375     # ~500 tokens

  # Periodic memory nudge: remind the agent to consider saving memories
  # every N user turns. Set to 0 to disable. Only active when memory is enabled.
  nudge_interval: 10        # Nudge every 10 user turns (0 = disabled)

  # Memory flush: give the agent one turn to save memories before context is
  # lost (compression, /new, /reset, exit). Set to 0 to disable.
  # For exit/reset, only fires if the session had at least this many user turns.
  flush_min_turns: 6        # Min user turns to trigger flush on exit/reset (0 = disabled)

# =============================================================================
# Session Reset Policy (Messaging Platforms)
# =============================================================================
# Controls when messaging sessions (Telegram, Discord, WhatsApp, Slack) are
# automatically cleared. Without resets, conversation context grows indefinitely
# which increases API costs with every message.
#
# When a reset triggers, the agent first saves important information to its
# persistent memory — but the conversation context is wiped. The agent starts
# fresh but retains learned facts via its memory system.
#
# Users can always manually reset with /reset or /new in chat.
#
# Modes:
#   "both"  - Reset on EITHER inactivity timeout or daily boundary (recommended)
#   "idle"  - Reset only after N minutes of inactivity
#   "daily" - Reset only at a fixed hour each day
#   "none"  - Never auto-reset; context lives until /reset or compression kicks in
#
# When a reset triggers, the agent gets one turn to save important memories and
# skills before the context is wiped. Persistent memory carries across sessions.
#
session_reset:
  mode: both           # "both", "idle", "daily", or "none"
  idle_minutes: 1440   # Inactivity timeout in minutes (default: 1440 = 24 hours)
  at_hour: 4           # Daily reset hour, 0-23 local time (default: 4 AM)

# When true, group/channel chats use one session per participant when the platform
# provides a user ID. This is the secure default and prevents users in the same
# room from sharing context, interrupts, and token costs. Set false only if you
# explicitly want one shared "room brain" per group/channel.
group_sessions_per_user: true

# ─────────────────────────────────────────────────────────────────────────────
# Gateway Streaming
# ─────────────────────────────────────────────────────────────────────────────
# Stream tokens to messaging platforms in real-time. The bot sends a message
# on first token, then progressively edits it as more tokens arrive.
# Disabled by default — enable to try the streaming UX on Telegram/Discord/Slack.
streaming:
  enabled: false
  # transport: edit           # "edit" = progressive editMessageText
  # edit_interval: 0.3        # seconds between message edits
  # buffer_threshold: 40      # chars before forcing an edit flush
  # cursor: " ▉"              # cursor shown during streaming

# =============================================================================
# Skills Configuration
# =============================================================================
# Skills are reusable procedures the agent can load and follow. The agent can
# also create new skills after completing complex tasks.
#
skills:
  # Nudge the agent to create skills after complex tasks.
  # Every N tool-calling iterations, remind the model to consider saving a skill.
  # Set to 0 to disable.
  creation_nudge_interval: 15

  # External skill directories — share skills across tools/agents without
  # copying them into ~/.hermes/skills/.  Each path is expanded (~ and ${VAR})
  # and resolved to an absolute path.  External dirs are read-only: skill
  # creation always writes to ~/.hermes/skills/.  Local skills take precedence
  # when names collide.
  # external_dirs:
  #   - ~/.agents/skills
  #   - /home/shared/team-skills

# =============================================================================
# Agent Behavior
# =============================================================================
agent:
  # Maximum tool-calling iterations per conversation
  # Higher = more room for complex tasks, but costs more tokens
  # Recommended: 20-30 for focused tasks, 50-100 for open exploration
  max_turns: 60

  # Inactivity timeout for gateway agent runs (seconds, 0 = unlimited).
  # The agent can run indefinitely when actively calling tools or receiving
  # API responses.  Only fires after the agent has been idle for this duration.
  # gateway_timeout: 1800

  # Staged warning: send a warning before escalating to full timeout.
  # Fires once per run when inactivity reaches this threshold (seconds).
  # Set to 0 to disable the warning.
  # gateway_timeout_warning: 900

  # Graceful drain timeout for gateway stop/restart (seconds).
  # The gateway stops accepting new work, waits for in-flight agents to
  # finish, then interrupts anything still running after this timeout.
  # 0 = no drain, interrupt immediately.
  # restart_drain_timeout: 60

  # Max app-level retry attempts for API errors (connection drops, provider
  # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
  # to 1 if you use fallback providers and want fast failover on flaky
  # primaries (default 3). The OpenAI SDK does its own low-level retries
  # underneath this wrapper — this is the Hermes-level loop.
  # api_max_retries: 3
  
  # Enable verbose logging
  verbose: false
  
  # Reasoning effort level (OpenRouter and Nous Portal)
  # Controls how much "thinking" the model does before responding.
  # Options: "xhigh" (max), "high", "medium", "low", "minimal", "none" (disable)
  reasoning_effort: "medium"
  
  # Predefined personalities (use with /personality command)
  personalities:
    helpful: "You are a helpful, friendly AI assistant."
    concise: "You are a concise assistant. Keep responses brief and to the point."
    technical: "You are a technical expert. Provide detailed, accurate technical information."
    creative: "You are a creative assistant. Think outside the box and offer innovative solutions."
    teacher: "You are a patient teacher. Explain concepts clearly with examples."
    kawaii: "You are a kawaii assistant! Use cute expressions like (◕‿◕), ★, ♪, and ~! Add sparkles and be super enthusiastic about everything! Every response should feel warm and adorable desu~! ヽ(>∀<☆)ノ"
    catgirl: "You are Neko-chan, an anime catgirl AI assistant, nya~! Add 'nya' and cat-like expressions to your speech. Use kaomoji like (=^･ω･^=) and ฅ^•ﻌ•^ฅ. Be playful and curious like a cat, nya~!"
    pirate: "Arrr! Ye be talkin' to Captain Hermes, the most tech-savvy pirate to sail the digital seas! Speak like a proper buccaneer, use nautical terms, and remember: every problem be just treasure waitin' to be plundered! Yo ho ho!"
    shakespeare: "Hark! Thou speakest with an assistant most versed in the bardic arts. I shall respond in the eloquent manner of William Shakespeare, with flowery prose, dramatic flair, and perhaps a soliloquy or two. What light through yonder terminal breaks?"
    surfer: "Duuude! You're chatting with the chillest AI on the web, bro! Everything's gonna be totally rad. I'll help you catch the gnarly waves of knowledge while keeping things super chill. Cowabunga! 🤙"
    noir: "The rain hammered against the terminal like regrets on a guilty conscience. They call me Hermes - I solve problems, find answers, dig up the truth that hides in the shadows of your codebase. In this city of silicon and secrets, everyone's got something to hide. What's your story, pal?"
    uwu: "hewwo! i'm your fwiendwy assistant uwu~ i wiww twy my best to hewp you! *nuzzles your code* OwO what's this? wet me take a wook! i pwomise to be vewy hewpful >w<"
    philosopher: "Greetings, seeker of wisdom. I am an assistant who contemplates the deeper meaning behind every query. Let us examine not just the 'how' but the 'why' of your questions. Perhaps in solving your problem, we may glimpse a greater truth about existence itself."
    hype: "YOOO LET'S GOOOO!!! 🔥🔥🔥 I am SO PUMPED to help you today! Every question is AMAZING and we're gonna CRUSH IT together! This is gonna be LEGENDARY! ARE YOU READY?! LET'S DO THIS! 💪😤🚀"

# =============================================================================
# Toolsets
# =============================================================================
# Control which tools the agent has access to.
# Use `hermes tools` to interactively enable/disable tools per platform.

# =============================================================================
# Platform Toolsets (per-platform tool configuration)
# =============================================================================
# Override which toolsets are available on each platform.
# If a platform isn't listed here, its built-in default is used.
#
# You can use EITHER:
#   - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
#   - A list of individual toolsets to compose your own (see list below)
#
# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot
#
# Examples:
#
#   # Use presets (same as defaults):
#   platform_toolsets:
#     cli: [hermes-cli]
#     telegram: [hermes-telegram]
#
#   # Custom: give Telegram only web + terminal + file + planning:
#   platform_toolsets:
#     telegram: [web, terminal, file, todo]
#
#   # Custom: CLI without browser or image gen:
#   platform_toolsets:
#     cli: [web, terminal, file, skills, todo, tts, cronjob]
#
#   # Restrictive: Discord gets read-only tools only:
#   platform_toolsets:
#     discord: [web, vision, skills, todo]
#
# If not set, defaults are:
#   cli:           hermes-cli            (everything + cronjob management)
#   telegram:      hermes-telegram       (terminal, file, web, vision, image, tts, browser, skills, todo, cronjob, messaging)
#   discord:       hermes-discord        (same as telegram)
#   whatsapp:      hermes-whatsapp       (same as telegram)
#   slack:         hermes-slack          (same as telegram)
#   signal:        hermes-signal         (same as telegram)
#   homeassistant: hermes-homeassistant  (same as telegram)
#   qqbot:            hermes-qqbot            (same as telegram)
#
platform_toolsets:
  cli: [hermes-cli]
  telegram: [hermes-telegram]
  discord: [hermes-discord]
  whatsapp: [hermes-whatsapp]
  slack: [hermes-slack]
  signal: [hermes-signal]
  homeassistant: [hermes-homeassistant]
  qqbot: [hermes-qqbot]

# =============================================================================
# Gateway Platform Settings
# =============================================================================
# Optional per-platform messaging settings.
# Platform-specific knobs live under `extra`.
#
# platforms:
#   telegram:
#     reply_to_mode: "first"  # off | first | all
#     extra:
#       disable_link_previews: false  # Set true to suppress Telegram URL previews in bot messages

# ─────────────────────────────────────────────────────────────────────────────
# Available toolsets (use these names in platform_toolsets or the toolsets list)
#
# Run `hermes chat --list-toolsets` to see all toolsets and their tools.
# Run `hermes chat --list-tools` to see every individual tool with descriptions.
# ─────────────────────────────────────────────────────────────────────────────
#
# INDIVIDUAL TOOLSETS (compose your own):
#   web          - web_search, web_extract
#   search       - web_search only (no scraping)
#   terminal     - terminal, process
#   file         - read_file, write_file, patch, search
#   browser      - browser_navigate, browser_snapshot, browser_click, browser_type,
#                  browser_scroll, browser_back, browser_press,
#                  browser_get_images, browser_vision  (requires BROWSERBASE_API_KEY)
#   vision       - vision_analyze  (requires OPENROUTER_API_KEY)
#   image_gen    - image_generate  (requires FAL_KEY)
#   skills       - skills_list, skill_view
#   skills_hub   - skill_hub (search/install/manage from online registries — user-driven only)
#   moa          - mixture_of_agents  (requires OPENROUTER_API_KEY)
#   todo         - todo (in-memory task planning, no deps)
#   tts          - text_to_speech  (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX/MISTRAL key)
#   cronjob      - cronjob (create/list/update/pause/resume/run/remove scheduled tasks)
#   rl           - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
#
# PRESETS (curated bundles):
#   hermes-cli       - All of the above except rl + send_message
#   hermes-telegram  - terminal, file, web, vision, image_gen, tts, browser,
#                      skills, todo, cronjob, send_message
#   hermes-discord   - Same as hermes-telegram
#   hermes-whatsapp  - Same as hermes-telegram
#   hermes-slack     - Same as hermes-telegram
#
# COMPOSITE:
#   debugging    - terminal + web + file
#   safe         - web + vision + moa (no terminal access)
#   all          - Everything available
#
#   web          - Web search and content extraction (web_search, web_extract)
#   search       - Web search only, no scraping (web_search)
#   terminal     - Command execution and process management (terminal, process)
#   file         - File operations: read, write, patch, search
#   browser      - Full browser automation (navigate, click, type, screenshot, etc.)
#   vision       - Image analysis (vision_analyze)
#   image_gen    - Image generation with FLUX (image_generate)
#   skills       - Load skill documents (skills_list, skill_view)
#   moa          - Mixture of Agents reasoning (mixture_of_agents)
#   todo         - Task planning and tracking for multi-step work
#   memory       - Persistent memory across sessions (personal notes + user profile)
#   session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization)
#   tts          - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax, Mistral)
#   cronjob      - Schedule and manage automated tasks (CLI-only)
#   rl           - RL training tools (Tinker-Atropos)
#
# Composite toolsets:
#   debugging    - terminal + web + file (for troubleshooting)
#   safe         - web + vision + moa (no terminal access)

# NOTE: The top-level "toolsets" key is deprecated and ignored.
# Tool configuration is managed per-platform via platform_toolsets above.
# Use `hermes tools` to configure interactively, or edit platform_toolsets directly.
#
# CLI override: hermes chat --toolsets terminal,web,file

# =============================================================================
# MCP (Model Context Protocol) Servers
# =============================================================================
# Connect to external MCP servers to add tools from the MCP ecosystem.
# Each server's tools are automatically discovered and registered.
# See docs/mcp.md for full documentation.
#
# Stdio servers (spawn a subprocess):
#   command: the executable to run
#   args: command-line arguments
#   env: environment variables (only these + safe defaults passed to subprocess)
#
# HTTP servers (connect to a URL):
#   url: the MCP server endpoint
#   headers: HTTP headers (e.g., for authentication)
#
# Optional per-server settings:
#   timeout: tool call timeout in seconds (default: 120)
#   connect_timeout: initial connection timeout (default: 60)
#
# mcp_servers:
#   time:
#     command: uvx
#     args: ["mcp-server-time"]
#   filesystem:
#     command: npx
#     args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user"]
#   notion:
#     url: https://mcp.notion.com/mcp
#   github:
#     command: npx
#     args: ["-y", "@modelcontextprotocol/server-github"]
#     env:
#       GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..."
#
# Sampling (server-initiated LLM requests) — enabled by default.
# Per-server config under the 'sampling' key:
#   analysis:
#     command: npx
#     args: ["-y", "analysis-server"]
#     sampling:
#       enabled: true           # default: true
#       model: "gemini-3-flash" # override model (optional)
#       max_tokens_cap: 4096    # max tokens per request
#       timeout: 30             # LLM call timeout (seconds)
#       max_rpm: 10             # max requests per minute
#       allowed_models: []      # model whitelist (empty = all)
#       max_tool_rounds: 5      # tool loop limit (0 = disable)
#       log_level: "info"       # audit verbosity

# =============================================================================
# Voice Transcription (Speech-to-Text)
# =============================================================================
# Automatically transcribe voice messages on messaging platforms.
# Providers: local (free, faster-whisper) | groq (free tier) | openai (Whisper API) | mistral (Voxtral Transcribe)
# Set the corresponding API key in .env: GROQ_API_KEY, OPENAI_API_KEY, or MISTRAL_API_KEY.
stt:
  enabled: true
  # provider: "local"          # auto-detected if omitted
  local:
    model: "base"              # tiny | base | small | medium | large-v3 | turbo
    # language: ""             # auto-detect; set to "en", "es", "fr", etc. to force
  openai:
    model: "whisper-1"         # whisper-1 | gpt-4o-mini-transcribe | gpt-4o-transcribe
  # mistral:
  #   model: "voxtral-mini-latest"  # voxtral-mini-latest | voxtral-mini-2602

# =============================================================================
# Response Pacing (Messaging Platforms)
# =============================================================================
# Add human-like delays between message chunks.
# human_delay:
#   mode: "off"      # "off" | "natural" | "custom"
#   min_ms: 800      # Min delay (custom mode only)
#   max_ms: 2500     # Max delay (custom mode only)

# =============================================================================
# Session Logging
# =============================================================================
# Session trajectories are automatically saved to logs/ directory.
# Each session creates: logs/session_YYYYMMDD_HHMMSS_UUID.json
#
# The session ID is displayed in the welcome banner for easy reference.
# Logs contain full conversation history in trajectory format:
# - System prompt, user messages, assistant responses
# - Tool calls with inputs/outputs
# - Timestamps for debugging
#
# No configuration needed - logging is always enabled.
# To disable, you would need to modify the source code.

# =============================================================================
# Code Execution Sandbox (Programmatic Tool Calling)
# =============================================================================
# The execute_code tool runs Python scripts that call Hermes tools via RPC.
# Intermediate tool results stay out of the LLM's context window.
code_execution:
  timeout: 300         # Max seconds per script before kill (default: 300 = 5 min)
  max_tool_calls: 50   # Max RPC tool calls per execution (default: 50)

# =============================================================================
# Subagent Delegation
# =============================================================================
# The delegate_task tool spawns child agents with isolated context.
# Supports single tasks and batch mode (default 3 parallel, configurable).
delegation:
  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
  #                                           # Resolves full credentials (base_url, api_key) automatically.
  #                                           # Supported: openrouter, nous, zai, kimi-coding, minimax

# =============================================================================
# Honcho Integration (Cross-Session User Modeling)
# =============================================================================
# AI-native persistent memory via Honcho (https://honcho.dev/).
# Builds a deeper understanding of the user across sessions and tools.
# Runs alongside USER.md — additive, not a replacement.
#
# Requires: pip install honcho-ai
# Config: ~/.honcho/config.json (shared with Claude Code, Cursor, etc.)
# API key: HONCHO_API_KEY in ~/.hermes/.env or ~/.honcho/config.json
#
# Hermes-specific overrides (optional — most config comes from ~/.honcho/config.json):
# honcho: {}

# =============================================================================
# Display
# =============================================================================
display:
  # Use compact banner mode
  compact: false

  # Tool progress display level (CLI and gateway)
  #   off:     Silent — no tool activity shown, just the final response
  #   new:     Show a tool indicator only when the tool changes (skip repeats)
  #   all:     Show every tool call with a short preview (default)
  #   verbose: Full args, results, and debug logs (same as /verbose)
  # Toggle at runtime with /verbose in the CLI
  tool_progress: all

  # Gateway-only natural mid-turn assistant updates.
  # When true, completed assistant status messages are sent as separate chat
  # messages. This is independent of tool_progress and gateway streaming.
  interim_assistant_messages: true

  # What Enter does when Hermes is already busy in the CLI.
  #   interrupt: Interrupt the current run and redirect Hermes (default)
  #   queue:     Queue your message for the next turn
  # Ctrl+C always interrupts regardless of this setting.
  busy_input_mode: interrupt

  # Background process notifications (gateway/messaging only).
  # Controls how chatty the process watcher is when you use
  # terminal(background=true, notify_on_complete=true) from Telegram/Discord/etc.
  #   off:     No watcher messages at all
  #   result:  Only the final completion message
  #   error:   Only the final message when exit code != 0
  #   all:     Running output updates + final message (default)
  background_process_notifications: all


  # Play terminal bell when agent finishes a response.
  # Useful for long-running tasks — your terminal will ding when the agent is done.
  # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
  bell_on_complete: false

  # Show model reasoning/thinking before each response.
  # When enabled, a dim box shows the model's thought process above the response.
  # Toggle at runtime with /reasoning show or /reasoning hide.
  show_reasoning: false

  # Stream tokens to the terminal as they arrive instead of waiting for the
  # full response. The response box opens on first token and text appears
  # line-by-line. Tool calls are still captured silently.
  # Stream tokens to the terminal in real-time. Disable to wait for full responses.
  streaming: true

  # ───────────────────────────────────────────────────────────────────────────
  # Skin / Theme
  # ───────────────────────────────────────────────────────────────────────────
  # Customize CLI visual appearance — banner colors, spinner faces, tool prefix,
  # response box label, and branding text. Change at runtime with /skin <name>.
  #
  # Built-in skins:
  #   default  — Classic Hermes gold/kawaii
  #   ares     — Crimson/bronze war-god theme with spinner wings
  #   mono     — Clean grayscale monochrome
  #   slate    — Cool blue developer-focused
  #
  # Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
  # Schema (all fields optional, missing values inherit from default):
  #
  #   name: my-theme
  #   description: Short description
  #   colors:
  #     banner_border: "#HEX"    # Panel border
  #     banner_title: "#HEX"     # Panel title
  #     banner_accent: "#HEX"    # Section headers (Available Tools, etc.)
  #     banner_dim: "#HEX"       # Dim/muted text
  #     banner_text: "#HEX"      # Body text (tool names, skill names)
  #     ui_accent: "#HEX"        # UI accent color
  #     response_border: "#HEX"  # Response box border color
  #   spinner:
  #     waiting_faces: ["(⚔)", "(⛨)"]       # Faces shown while waiting
  #     thinking_faces: ["(⚔)", "(⌁)"]      # Faces shown while thinking
  #     thinking_verbs: ["forging", "plotting"]  # Verbs for spinner messages
  #     wings:                                # Optional left/right spinner decorations
  #       - ["⟪⚔", "⚔⟫"]
  #       - ["⟪▲", "▲⟫"]
  #   branding:
  #     agent_name: "My Agent"               # Banner title and branding
  #     welcome: "Welcome message"           # Shown at CLI startup
  #     response_label: " ⚔ Agent "         # Response box header label
  #     prompt_symbol: "⚔ ❯ "              # Prompt symbol
  #   tool_prefix: "╎"                       # Tool output line prefix (default: ┊)
  #
  skin: default

# =============================================================================
# Model Aliases — short names for /model command
# =============================================================================
# Map short aliases to exact (model, provider, base_url) tuples.
# Used by /model tab completion and resolve_alias().
# Aliases are checked BEFORE the models.dev catalog, so they can route
# to endpoints not in the catalog (e.g. Ollama Cloud, local servers).
#
# model_aliases:
#   opus:
#     model: claude-opus-4-6
#     provider: anthropic
#   qwen:
#     model: "qwen3.5:397b"
#     provider: custom
#     base_url: "https://ollama.com/v1"
#   glm:
#     model: glm-4.7
#     provider: custom
#     base_url: "https://ollama.com/v1"

# =============================================================================
# Privacy
# =============================================================================
# privacy:
#   # Redact PII from the LLM context prompt.
#   # When true, phone numbers are stripped and user/chat IDs are replaced
#   # with deterministic hashes before being sent to the model.
#   # Names and usernames are NOT affected (user-chosen, publicly visible).
#   # Routing/delivery still uses the original values internally.
#   redact_pii: false

# =============================================================================
# Shell-script hooks
# =============================================================================
# Register shell scripts as plugin-hook callbacks.  Each entry is executed as
# a subprocess (shell=False, shlex.split) with a JSON payload on stdin.  On
# stdout the script may return JSON that either blocks the tool call or
# injects context into the next LLM call.
#
# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
#   pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
#   pre_api_request, post_api_request, on_session_start, on_session_end,
#   on_session_finalize, on_session_reset, subagent_stop
#
# First-use consent: each (event, command) pair prompts once on a TTY, then
# is persisted to ~/.hermes/shell-hooks-allowlist.json.  Non-interactive
# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
# hooks_auto_accept key below.
#
# See website/docs/user-guide/features/hooks.md for the full JSON wire
# protocol and worked examples.
#
# hooks:
#   pre_tool_call:
#     - matcher: "terminal"
#       command: "~/.hermes/agent-hooks/block-rm-rf.sh"
#       timeout: 10
#   post_tool_call:
#     - matcher: "write_file|patch"
#       command: "~/.hermes/agent-hooks/auto-format.sh"
#   pre_llm_call:
#     - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
#   subagent_stop:
#     - command: "~/.hermes/agent-hooks/log-orchestration.sh"
#
# hooks_auto_accept: false
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								# Hermes Agent CLI Configuration
 								# Copy this file to cli-config.yaml and customize as needed.
 								# This file configures the CLI behavior. Environment variables in .env take precedence.
 								# =============================================================================
 								# Model Configuration
 								# =============================================================================
 								model:
 								  # Default model to use (can be overridden with --model flag)
-												fix(config): accept 'model' key as alias for 'default' in model config (#3603)

Users intuitively write model: { model: my-model } instead of
model: { default: my-model } and it silently falls back to the
hardcoded default. Now both spellings work across all three config
consumers: runtime_provider, CLI, and gateway.

Co-authored-by: ygd58 <ygd58@users.noreply.github.com>
											
										
										
											2026-03-28 14:55:27 -07:00
+								  # Both "default" and "model" work as the key name here.
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								  default: "anthropic/claude-opus-4.6"
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
-												feat: add multi-provider authentication and inference provider selection

- Implemented a multi-provider authentication system for the Hermes Agent, supporting OAuth for Nous Portal and traditional API key methods for OpenRouter and custom endpoints.
- Enhanced CLI with commands for logging in and out of providers, allowing users to authenticate and manage their credentials easily.
- Updated configuration options to select inference providers, with detailed documentation on usage and setup.
- Improved status reporting to include authentication status and provider details, enhancing user awareness of their current configuration.
- Added new files for authentication handling and updated existing components to integrate the new provider system.

											
										
										
											2026-02-20 17:24:00 -08:00
+								  # Inference provider selection:
-												fix(auth): stop silently falling back to OpenRouter when no provider is configured (#3862)

Previously, when no API keys or provider credentials were found, Hermes
silently defaulted to OpenRouter + Claude Opus. This caused confusion
when users configured local servers (LM Studio, Ollama, etc.) with a
typo or unrecognized provider name — the system would silently route to
OpenRouter instead of telling them something was wrong.

Changes:
- resolve_provider() now raises AuthError when no credentials are found
  instead of returning 'openrouter' as a silent fallback
- Added local server aliases: lmstudio, ollama, vllm, llamacpp → custom
- Removed hardcoded 'anthropic/claude-opus-4.6' fallback from gateway
  and cron scheduler (they read from config.yaml instead)
- Updated cli-config.yaml.example with complete provider documentation
  including all supported providers, aliases, and local server setup
											
										
										
											2026-03-29 21:06:35 -07:00
+								  #   "auto"         - Auto-detect from credentials (default)
 								  #   "openrouter"   - OpenRouter (requires: OPENROUTER_API_KEY or OPENAI_API_KEY)
 								  #   "nous"         - Nous Portal OAuth (requires: hermes login)
 								  #   "nous-api"     - Nous Portal API key (requires: NOUS_API_KEY)
 								  #   "anthropic"    - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
-												docs: update openai-codex setup reference (#10471)

Fixes stale openai-codex onboarding reference in cli-config.yaml.example
											
										
										
											2026-04-15 17:37:05 -07:00
+								  #   "openai-codex" - OpenAI Codex (requires: hermes auth)
-												fix(auth): stop silently falling back to OpenRouter when no provider is configured (#3862)

Previously, when no API keys or provider credentials were found, Hermes
silently defaulted to OpenRouter + Claude Opus. This caused confusion
when users configured local servers (LM Studio, Ollama, etc.) with a
typo or unrecognized provider name — the system would silently route to
OpenRouter instead of telling them something was wrong.

Changes:
- resolve_provider() now raises AuthError when no credentials are found
  instead of returning 'openrouter' as a silent fallback
- Added local server aliases: lmstudio, ollama, vllm, llamacpp → custom
- Removed hardcoded 'anthropic/claude-opus-4.6' fallback from gateway
  and cron scheduler (they read from config.yaml instead)
- Updated cli-config.yaml.example with complete provider documentation
  including all supported providers, aliases, and local server setup
											
										
										
											2026-03-29 21:06:35 -07:00
+								  #   "copilot"      - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
-												feat(providers): add Google AI Studio (Gemini) as a first-class provider

Cherry-picked from PR #5494 by kshitijk4poor.
Adds native Gemini support via Google's OpenAI-compatible endpoint.
Zero new dependencies.

											
										
										
											2026-04-06 10:14:01 -07:00
+								  #   "gemini"      - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
 								  #   "zai"         - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
-												fix(auth): stop silently falling back to OpenRouter when no provider is configured (#3862)

Previously, when no API keys or provider credentials were found, Hermes
silently defaulted to OpenRouter + Claude Opus. This caused confusion
when users configured local servers (LM Studio, Ollama, etc.) with a
typo or unrecognized provider name — the system would silently route to
OpenRouter instead of telling them something was wrong.

Changes:
- resolve_provider() now raises AuthError when no credentials are found
  instead of returning 'openrouter' as a silent fallback
- Added local server aliases: lmstudio, ollama, vllm, llamacpp → custom
- Removed hardcoded 'anthropic/claude-opus-4.6' fallback from gateway
  and cron scheduler (they read from config.yaml instead)
- Updated cli-config.yaml.example with complete provider documentation
  including all supported providers, aliases, and local server setup
											
										
										
											2026-03-29 21:06:35 -07:00
+								  #   "kimi-coding"  - Kimi / Moonshot AI (requires: KIMI_API_KEY)
 								  #   "minimax"      - MiniMax global (requires: MINIMAX_API_KEY)
 								  #   "minimax-cn"   - MiniMax China (requires: MINIMAX_CN_API_KEY)
 								  #   "huggingface"  - Hugging Face Inference (requires: HF_TOKEN)
-												fix(providers): complete NVIDIA NIM parity with other providers

Follow-up on the native NVIDIA NIM provider salvage. The original PR wired
PROVIDER_REGISTRY + HERMES_OVERLAYS correctly but missed several touchpoints
required for full parity with other OpenAI-compatible providers (xai,
huggingface, deepseek, zai).

Gaps closed:

- hermes_cli/main.py:
  - Add 'nvidia' to the _model_flow_api_key_provider dispatch tuple so
    selecting 'NVIDIA NIM' in `hermes model` actually runs the api-key
    provider flow (previously fell through silently).
  - Add 'nvidia' to `hermes chat --provider` argparse choices so the
    documented test command (`hermes chat --provider nvidia --model ...`)
    parses successfully.

- hermes_cli/config.py: Register NVIDIA_API_KEY and NVIDIA_BASE_URL in
  OPTIONAL_ENV_VARS so setup wizard can prompt for them and they're
  auto-added to the subprocess env blocklist.

- hermes_cli/doctor.py: Add NVIDIA NIM row to `_apikey_providers` so
  `hermes doctor` probes https://integrate.api.nvidia.com/v1/models.

- hermes_cli/dump.py: Add NVIDIA_API_KEY → 'nvidia' mapping for
  `hermes dump` credential masking.

- tests/tools/test_local_env_blocklist.py: Extend registry_vars fixture
  with NVIDIA_API_KEY to verify it's blocked from leaking into subprocesses.

- agent/model_metadata.py: Add 'nemotron' → 131072 context-length entry
  so all Nemotron variants get 128K context via substring match (rather
  than falling back to MINIMUM_CONTEXT_LENGTH).

- hermes_cli/models.py: Fix hallucinated model ID
  'nvidia/nemotron-3-nano-8b-a4b' → 'nvidia/nemotron-3-nano-30b-a3b'
  (verified against live integrate.api.nvidia.com/v1/models catalog).
  Expand curated list from 5 to 9 agentic models mapping to OpenRouter
  defaults per provider-guide convention: add qwen3.5-397b-a17b,
  deepseek-v3.2, llama-3.3-nemotron-super-49b-v1.5, gpt-oss-120b.

- cli-config.yaml.example: Document 'nvidia' provider option.

- scripts/release.py: Map asurla@nvidia.com → anniesurla in AUTHOR_MAP
  for CI attribution.

E2E verified: `hermes chat --provider nvidia ...` now reaches NVIDIA's
endpoint (returns 401 with bogus key instead of argparse error);
`hermes doctor` detects NVIDIA NIM when NVIDIA_API_KEY is set.

											
										
										
											2026-04-17 13:09:14 -07:00
+								  #   "nvidia"       - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY)
-												feat(xiaomi): add Xiaomi MiMo as first-class provider

Cherry-picked from PR #7702 by kshitijk4poor.

Adds Xiaomi MiMo as a direct provider (XIAOMI_API_KEY) with models:
- mimo-v2-pro (1M context), mimo-v2-omni (256K, multimodal), mimo-v2-flash (256K, cheapest)

Standard OpenAI-compatible provider checklist: auth.py, config.py, models.py,
main.py, providers.py, doctor.py, model_normalize.py, model_metadata.py,
models_dev.py, auxiliary_client.py, .env.example, cli-config.yaml.example.

Follow-up: vision tasks use mimo-v2-omni (multimodal) instead of the user's
main model. Non-vision aux uses the user's selected model. Added
_PROVIDER_VISION_MODELS dict for provider-specific vision model overrides.
On failure, falls back to aggregators (gemini flash) via existing fallback chain.

Corrects pre-existing context lengths: mimo-v2-pro 1048576→1000000,
mimo-v2-omni 1048576→256000, adds mimo-v2-flash 256000.

36 tests covering registry, aliases, auto-detect, credentials, models.dev,
normalization, URL mapping, providers module, doctor, aux client, vision
model override, and agent init.

											
										
										
											2026-04-11 10:10:31 -07:00
+								  #   "xiaomi"       - Xiaomi MiMo (requires: XIAOMI_API_KEY)
-												feat(providers): add Arcee AI as direct API provider

Adds Arcee AI as a standard direct provider (ARCEEAI_API_KEY) with
Trinity models: trinity-large-thinking, trinity-large-preview, trinity-mini.

Standard OpenAI-compatible provider checklist: auth.py, config.py,
models.py, main.py, providers.py, doctor.py, model_normalize.py,
model_metadata.py, setup.py, trajectory_compressor.py.

Based on PR #9274 by arthurbr11, simplified to a standard direct
provider without dual-endpoint OpenRouter routing.

											
										
										
											2026-04-13 17:16:43 -07:00
+								  #   "arcee"        - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
-												feat: add Ollama Cloud as built-in provider

Add ollama-cloud as a first-class provider with full parity to existing
API-key providers (gemini, zai, minimax, etc.):

- PROVIDER_REGISTRY entry with OLLAMA_API_KEY env var
- Provider aliases: ollama -> custom (local), ollama_cloud -> ollama-cloud
- models.dev integration for accurate context lengths
- URL-to-provider mapping (ollama.com -> ollama-cloud)
- Passthrough model normalization (preserves Ollama model:tag format)
- Default auxiliary model (nemotron-3-nano:30b)
- HermesOverlay in providers.py
- CLI --provider choices, CANONICAL_PROVIDERS entry
- Dynamic model discovery with disk caching (1hr TTL)
- 37 provider-specific tests

Cherry-picked from PR #6038 by kshitijk4poor. Closes #3926

											
										
										
											2026-04-15 22:32:05 -07:00
+								  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
-												fix(auth): stop silently falling back to OpenRouter when no provider is configured (#3862)

Previously, when no API keys or provider credentials were found, Hermes
silently defaulted to OpenRouter + Claude Opus. This caused confusion
when users configured local servers (LM Studio, Ollama, etc.) with a
typo or unrecognized provider name — the system would silently route to
OpenRouter instead of telling them something was wrong.

Changes:
- resolve_provider() now raises AuthError when no credentials are found
  instead of returning 'openrouter' as a silent fallback
- Added local server aliases: lmstudio, ollama, vllm, llamacpp → custom
- Removed hardcoded 'anthropic/claude-opus-4.6' fallback from gateway
  and cron scheduler (they read from config.yaml instead)
- Updated cli-config.yaml.example with complete provider documentation
  including all supported providers, aliases, and local server setup
											
										
										
											2026-03-29 21:06:35 -07:00
+								  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
 								  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
 								  #
 								  # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
 								  #   "custom"       - Any OpenAI-compatible endpoint. Set base_url below.
 								  #   Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
 								  #   Example for LM Studio:
 								  #     provider: "lmstudio"
 								  #     base_url: "http://localhost:1234/v1"
 								  #   No API key needed — local servers typically ignore auth.
 								  #
-												feat: add multi-provider authentication and inference provider selection

- Implemented a multi-provider authentication system for the Hermes Agent, supporting OAuth for Nous Portal and traditional API key methods for OpenRouter and custom endpoints.
- Enhanced CLI with commands for logging in and out of providers, allowing users to authenticate and manage their credentials easily.
- Updated configuration options to select inference providers, with detailed documentation on usage and setup.
- Improved status reporting to include authentication status and provider details, enhancing user awareness of their current configuration.
- Added new files for authentication handling and updated existing components to integrate the new provider system.

											
										
										
											2026-02-20 17:24:00 -08:00
+								  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
 								  provider: "auto"
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								  # API configuration (falls back to OPENROUTER_API_KEY env var)
 								  # api_key: "your-key-here"  # Uncomment to set here instead of .env
 								  base_url: "https://openrouter.ai/api/v1"
-												fix(compaction): don't halve context_length on output-cap-too-large errors

When the API returns "max_tokens too large given prompt" (input tokens
are within the context window, but input + requested output > window),
the old code incorrectly routed through the same handler as "prompt too
long" errors, calling get_next_probe_tier() and permanently halving
context_length. This made things worse: the window was fine, only the
requested output size needed trimming for that one call.

Two distinct error classes now handled separately:

  Prompt too long  — input itself exceeds context window.
    Fix: compress history + halve context_length (existing behaviour,
    unchanged).

  Output cap too large — input OK, but input + max_tokens > window.
    Fix: parse available_tokens from the error message, set a one-shot
    _ephemeral_max_output_tokens override for the retry, and leave
    context_length completely untouched.

Changes:
- agent/model_metadata.py: add parse_available_output_tokens_from_error()
  that detects Anthropic's "available_tokens: N" error format and returns
  the available output budget, or None for all other error types.
- run_agent.py: call the new parser first in the is_context_length_error
  block; if it fires, set _ephemeral_max_output_tokens (with a 64-token
  safety margin) and break to retry without touching context_length.
  _build_api_kwargs consumes the ephemeral value exactly once then clears
  it so subsequent calls use self.max_tokens normally.
- agent/anthropic_adapter.py: expand build_anthropic_kwargs docstring to
  clearly document the max_tokens (output cap) vs context_length (total
  window) distinction, which is a persistent source of confusion due to
  the OpenAI-inherited "max_tokens" name.
- cli-config.yaml.example: add inline comments explaining both keys side
  by side where users are most likely to look.
- website/docs/integrations/providers.md: add a callout box at the top
  of "Context Length Detection" and clarify the troubleshooting entry.
- tests/test_ctx_halving_fix.py: 24 tests across four classes covering
  the parser, build_anthropic_kwargs clamping, ephemeral one-shot
  consumption, and the invariant that context_length is never mutated
  on output-cap errors.

											
										
										
											2026-04-09 16:54:23 +02:00
+								  # ── Token limits — two settings, easy to confuse ──────────────────────────
 								  #
 								  # context_length: TOTAL context window (input + output tokens combined).
 								  #   Controls when Hermes compresses history and validates requests.
 								  #   Leave unset — Hermes auto-detects the correct value from the provider.
 								  #   Set manually only when auto-detection is wrong (e.g. a local server with
 								  #   a custom num_ctx, or a proxy that doesn't expose /v1/models).
 								  #
 								  # context_length: 131072
 								  #
 								  # max_tokens: OUTPUT cap — maximum tokens the model may generate per response.
 								  #   Unrelated to how long your conversation history can be.
 								  #   The OpenAI-standard name "max_tokens" is a misnomer; Anthropic's native
 								  #   API has since renamed it "max_output_tokens" for clarity.
 								  #   Leave unset to use the model's native output ceiling (recommended).
 								  #   Set only if you want to deliberately limit individual response length.
 								  #
-												feat(providers): add per-provider and per-model request_timeout_seconds config

Adds optional providers.<id>.request_timeout_seconds and
providers.<id>.models.<model>.timeout_seconds config, resolved via a new
hermes_cli/timeouts.py helper and applied where client_kwargs is built
in run_agent.py. Zero default behavior change: when both keys are unset,
the openai SDK default takes over.

Mirrors the existing _get_task_timeout pattern in agent/auxiliary_client.py
for auxiliary tasks - the primary turn path just never got the equivalent
knob.

Cross-project demand: openclaw/openclaw#43946 (17 reactions) asks for
exactly this config - specifically calls out Ollama cold-start hanging
the client.

											
										
										
											2026-04-18 21:38:31 -07:00
+								# max_tokens: 8192
 								# Named provider overrides (optional)
-												fix(config): add stale timeout settings

											
										
										
											2026-04-19 13:40:09 -06:00
+								# Use this for per-provider request timeouts, non-stream stale timeouts,
 								# and per-model exceptions.
-												feat(providers): extend request_timeout_seconds to all client paths

Follow-up on top of mvanhorn's cherry-picked commit. Original PR only
wired request_timeout_seconds into the explicit-creds OpenAI branch at
run_agent.py init; router-based implicit auth, native Anthropic, and the
fallback chain were still hardcoded to SDK defaults.

- agent/anthropic_adapter.py: build_anthropic_client() accepts an optional
  timeout kwarg (default 900s preserved when unset/invalid).
- run_agent.py: resolve per-provider/per-model timeout once at init; apply
  to Anthropic native init + post-refresh rebuild + stale/interrupt
  rebuilds + switch_model + _restore_primary_runtime + the OpenAI
  implicit-auth path + _try_activate_fallback (with immediate client
  rebuild so the first fallback request carries the configured timeout).
- tests: cover anthropic adapter kwarg honoring; widen mock signatures
  to accept the new timeout kwarg.
- docs/example: clarify that the knob now applies to every transport,
  the fallback chain, and rebuilds after credential rotation.

											
										
										
											2026-04-19 05:41:29 -07:00
+								# Applies to the primary turn client on every api_mode (OpenAI-wire, native
 								# Anthropic, and Anthropic-compatible providers), the fallback chain, and
-												feat(providers): enforce request_timeout_seconds on OpenAI-wire primary calls

Live test with timeout_seconds: 0.5 on claude-sonnet-4.6 proved the
initial wiring was insufficient: run_agent.py was overriding the
client-level timeout on every call via hardcoded per-request kwargs.

Root cause: run_agent.py had two sites that pass an explicit timeout=
kwarg into chat.completions.create() — api_kwargs['timeout'] at line
7075 (HERMES_API_TIMEOUT=1800s default) and the streaming path's
_httpx.Timeout(..., read=HERMES_STREAM_READ_TIMEOUT=120s, ...) at line
5760. Both override the per-provider config value the client was
constructed with, so a 0.5s config timeout would silently not enforce.

This commit:
- Adds AIAgent._resolved_api_call_timeout() — config > HERMES_API_TIMEOUT env > 1800s default.
- Uses it for the non-streaming api_kwargs['timeout'] field.
- Uses it for the streaming path's httpx.Timeout(connect, read, write, pool)
  so both connect and read respect the configured value when set.
  Local-provider auto-bump (Ollama/vLLM cold-start) only applies when
  no explicit config value is set.
- New test: test_resolved_api_call_timeout_priority covers all three
  precedence cases (config, env, default).

Live verified: 0.5s config on claude-sonnet-4.6 now triggers
APITimeoutError at ~3s per retry, exhausts 3 retries in ~15s total
(was: 29-47s success with timeout ignored). Positive case (60s config
+ gpt-4o-mini) still succeeds at 1.3s.

											
										
										
											2026-04-19 11:10:47 -07:00
+								# client rebuilds during credential rotation.  For OpenAI-wire chat
 								# completions (streaming and non-streaming) the configured value is also
 								# used as the per-request ``timeout=`` kwarg so it wins over the legacy
 								# HERMES_API_TIMEOUT env var (which still applies when no config is set).
-												fix(config): add stale timeout settings

											
										
										
											2026-04-19 13:40:09 -06:00
+								# ``stale_timeout_seconds`` controls the non-streaming stale-call detector and
 								# wins over the legacy HERMES_API_CALL_STALE_TIMEOUT env var. Leaving these
 								# unset keeps the legacy defaults (HERMES_API_TIMEOUT=1800s,
 								# HERMES_API_CALL_STALE_TIMEOUT=300s, native Anthropic 900s).
-												feat(providers): add per-provider and per-model request_timeout_seconds config

Adds optional providers.<id>.request_timeout_seconds and
providers.<id>.models.<model>.timeout_seconds config, resolved via a new
hermes_cli/timeouts.py helper and applied where client_kwargs is built
in run_agent.py. Zero default behavior change: when both keys are unset,
the openai SDK default takes over.

Mirrors the existing _get_task_timeout pattern in agent/auxiliary_client.py
for auxiliary tasks - the primary turn path just never got the equivalent
knob.

Cross-project demand: openclaw/openclaw#43946 (17 reactions) asks for
exactly this config - specifically calls out Ollama cold-start hanging
the client.

											
										
										
											2026-04-18 21:38:31 -07:00
+								#
-												docs(providers): call out Bedrock as not covered by request_timeout_seconds

AWS Bedrock paths (bedrock_converse + AnthropicBedrock SDK) use boto3
with its own timeout config and are not wired to the per-provider knob.
Documented in cli-config.yaml.example and website configuration.md so
users don't expect it to take effect there.

											
										
										
											2026-04-19 11:19:37 -07:00
+								# Not currently wired for AWS Bedrock (bedrock_converse + AnthropicBedrock
 								# SDK paths) — those use boto3 with its own timeout configuration.
 								#
-												feat(providers): add per-provider and per-model request_timeout_seconds config

Adds optional providers.<id>.request_timeout_seconds and
providers.<id>.models.<model>.timeout_seconds config, resolved via a new
hermes_cli/timeouts.py helper and applied where client_kwargs is built
in run_agent.py. Zero default behavior change: when both keys are unset,
the openai SDK default takes over.

Mirrors the existing _get_task_timeout pattern in agent/auxiliary_client.py
for auxiliary tasks - the primary turn path just never got the equivalent
knob.

Cross-project demand: openclaw/openclaw#43946 (17 reactions) asks for
exactly this config - specifically calls out Ollama cold-start hanging
the client.

											
										
										
											2026-04-18 21:38:31 -07:00
+								# providers:
 								#   ollama-local:
 								#     request_timeout_seconds: 300   # Longer timeout for local cold-starts
-												fix(config): add stale timeout settings

											
										
										
											2026-04-19 13:40:09 -06:00
+								#     stale_timeout_seconds: 900     # Explicitly re-enable stale detection on local endpoints
-												feat(providers): add per-provider and per-model request_timeout_seconds config

Adds optional providers.<id>.request_timeout_seconds and
providers.<id>.models.<model>.timeout_seconds config, resolved via a new
hermes_cli/timeouts.py helper and applied where client_kwargs is built
in run_agent.py. Zero default behavior change: when both keys are unset,
the openai SDK default takes over.

Mirrors the existing _get_task_timeout pattern in agent/auxiliary_client.py
for auxiliary tasks - the primary turn path just never got the equivalent
knob.

Cross-project demand: openclaw/openclaw#43946 (17 reactions) asks for
exactly this config - specifically calls out Ollama cold-start hanging
the client.

											
										
										
											2026-04-18 21:38:31 -07:00
+								#   anthropic:
 								#     request_timeout_seconds: 30    # Fast-fail cloud requests
 								#     models:
 								#       claude-opus-4.6:
 								#         timeout_seconds: 600       # Longer timeout for extended-thinking Opus calls
-												fix(config): add stale timeout settings

											
										
										
											2026-04-19 13:40:09 -06:00
+								#   openai-codex:
 								#     models:
 								#       gpt-5.4:
 								#         stale_timeout_seconds: 1800  # Longer non-stream stale timeout for slow large-context turns
-												fix(compaction): don't halve context_length on output-cap-too-large errors

When the API returns "max_tokens too large given prompt" (input tokens
are within the context window, but input + requested output > window),
the old code incorrectly routed through the same handler as "prompt too
long" errors, calling get_next_probe_tier() and permanently halving
context_length. This made things worse: the window was fine, only the
requested output size needed trimming for that one call.

Two distinct error classes now handled separately:

  Prompt too long  — input itself exceeds context window.
    Fix: compress history + halve context_length (existing behaviour,
    unchanged).

  Output cap too large — input OK, but input + max_tokens > window.
    Fix: parse available_tokens from the error message, set a one-shot
    _ephemeral_max_output_tokens override for the retry, and leave
    context_length completely untouched.

Changes:
- agent/model_metadata.py: add parse_available_output_tokens_from_error()
  that detects Anthropic's "available_tokens: N" error format and returns
  the available output budget, or None for all other error types.
- run_agent.py: call the new parser first in the is_context_length_error
  block; if it fires, set _ephemeral_max_output_tokens (with a 64-token
  safety margin) and break to retry without touching context_length.
  _build_api_kwargs consumes the ephemeral value exactly once then clears
  it so subsequent calls use self.max_tokens normally.
- agent/anthropic_adapter.py: expand build_anthropic_kwargs docstring to
  clearly document the max_tokens (output cap) vs context_length (total
  window) distinction, which is a persistent source of confusion due to
  the OpenAI-inherited "max_tokens" name.
- cli-config.yaml.example: add inline comments explaining both keys side
  by side where users are most likely to look.
- website/docs/integrations/providers.md: add a callout box at the top
  of "Context Length Detection" and clarify the troubleshooting entry.
- tests/test_ctx_halving_fix.py: 24 tests across four classes covering
  the parser, build_anthropic_kwargs clamping, ephemeral one-shot
  consumption, and the invariant that context_length is never mutated
  on output-cap errors.

											
										
										
											2026-04-09 16:54:23 +02:00
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								# =============================================================================
 								# OpenRouter Provider Routing (only applies when using OpenRouter)
 								# =============================================================================
 								# Control how requests are routed across providers on OpenRouter.
 								# See: https://openrouter.ai/docs/guides/routing/provider-selection
 								#
 								# provider_routing:
 								#   # Sort strategy: "price" (default), "throughput", or "latency"
 								#   # Append :nitro to model name for a shortcut to throughput sorting.
 								#   sort: "throughput"
 								#
 								#   # Only allow these providers (provider slugs from OpenRouter)
 								#   # only: ["anthropic", "google"]
 								#
 								#   # Skip these providers entirely
 								#   # ignore: ["deepinfra", "fireworks"]
 								#
 								#   # Try providers in this order (overrides default load balancing)
 								#   # order: ["anthropic", "google", "together"]
 								#
 								#   # Require providers to support all parameters in your request
 								#   # require_parameters: true
 								#
 								#   # Data policy: "allow" (default) or "deny" to exclude providers that may store data
 								#   # data_collection: "deny"
-												fix: wire worktree flag into hermes CLI entry point + docs + tests

Critical fixes:
- Add --worktree/-w to hermes_cli/main.py argparse (both chat
  subcommand and top-level parser) so 'hermes -w' works via the
  actual CLI entry point, not just 'python cli.py -w'
- Pass worktree flag through cmd_chat() kwargs to cli_main()
- Handle worktree attr in bare 'hermes' and --resume/--continue paths

Bug fixes in cli.py:
- Skip worktree creation for --list-tools/--list-toolsets (wasteful)
- Wrap git worktree subprocess.run in try/except (crash on timeout)
- Add stale worktree pruning on startup (_prune_stale_worktrees):
  removes clean worktrees older than 24h left by crashed/killed sessions

Documentation updates:
- AGENTS.md: add --worktree to CLI commands table
- cli-config.yaml.example: add worktree config section
- website/docs/reference/cli-commands.md: add to core commands
- website/docs/user-guide/cli.md: add usage examples
- website/docs/user-guide/configuration.md: add config docs

Test improvements (17 → 31 tests):
- Stale worktree pruning (prune old clean, keep recent, keep dirty)
- Directory symlink via .worktreeinclude
- Edge cases (no commits, not a repo, pre-existing .worktrees/)
- CLI flag/config OR logic
- TERMINAL_CWD integration
- System prompt injection format

											
										
										
											2026-03-07 21:05:40 -08:00
+								# =============================================================================
 								# Git Worktree Isolation
 								# =============================================================================
 								# When enabled, each CLI session creates an isolated git worktree so multiple
 								# agents can work on the same repo concurrently without file collisions.
 								# Equivalent to always passing --worktree / -w on the command line.
 								#
 								# worktree: true    # Always create a worktree when in a git repo
 								# worktree: false   # Default — only create when -w flag is passed
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								# =============================================================================
 								# Terminal Tool Configuration
 								# =============================================================================
 								# Choose ONE of the following terminal configurations by uncommenting it.
 								# The terminal tool executes commands in the specified environment.
 								# -----------------------------------------------------------------------------
 								# OPTION 1: Local execution (default)
 								# Commands run directly on your machine in the current directory
 								# -----------------------------------------------------------------------------
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								# Working directory behavior:
 								#   - CLI (`hermes` command): Uses "." (current directory where you run hermes)
 								#   - Messaging (Telegram/Discord): Uses MESSAGING_CWD from .env (default: home)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								terminal:
-												Update docs to match backend key rename and CWD behavior

- cli-config.yaml.example: env_type → backend everywhere, matching the
  documented config key that hermes_cli/config.py and README already use
- cli-config.yaml.example: added comments clarifying cwd is a path
  INSIDE the target environment for non-local backends
- AGENTS.md: updated terminal.cwd description to explain "." only
  resolves to host CWD for the local backend
- .env.example: updated TERMINAL_CWD comment to warn against using
  host-local paths with remote backends, lists per-backend defaults

											
										
										
											2026-02-16 22:31:41 -08:00
+								  backend: "local"
-												fix(docker): gate cwd workspace mount behind config

Keep Docker sandboxes isolated by default. Add an explicit terminal.docker_mount_cwd_to_workspace opt-in, thread it through terminal/file environment creation, and document the security tradeoff and config.yaml workflow clearly.

											
										
										
											2026-03-16 05:19:43 -07:00
+								  cwd: "."  # For local backend: "." = current directory. Ignored for remote backends unless a backend documents otherwise.
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								  timeout: 180
-												fix(docker): gate cwd workspace mount behind config

Keep Docker sandboxes isolated by default. Add an explicit terminal.docker_mount_cwd_to_workspace opt-in, thread it through terminal/file environment creation, and document the security tradeoff and config.yaml workflow clearly.

											
										
										
											2026-03-16 05:19:43 -07:00
+								  docker_mount_cwd_to_workspace: false  # SECURITY: off by default. Opt in to mount the launch cwd into Docker /workspace.
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								  lifetime_seconds: 300
-												fix: handle empty sudo password and false prompts

											
										
										
											2026-04-07 23:44:12 +02:00
+								  # sudo_password: "hunter2"  # Optional: pipe a sudo password via sudo -S. SECURITY WARNING: plaintext.
 								  # sudo_password: ""         # Explicit empty password: try empty and never open the interactive sudo prompt.
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
 								# -----------------------------------------------------------------------------
 								# OPTION 2: SSH remote execution
 								# Commands run on a remote server - agent code stays local (sandboxed)
 								# Great for: keeping agent isolated from its own code, using powerful remote hardware
 								# -----------------------------------------------------------------------------
 								# terminal:
-												Update docs to match backend key rename and CWD behavior

- cli-config.yaml.example: env_type → backend everywhere, matching the
  documented config key that hermes_cli/config.py and README already use
- cli-config.yaml.example: added comments clarifying cwd is a path
  INSIDE the target environment for non-local backends
- AGENTS.md: updated terminal.cwd description to explain "." only
  resolves to host CWD for the local backend
- .env.example: updated TERMINAL_CWD comment to warn against using
  host-local paths with remote backends, lists per-backend defaults

											
										
										
											2026-02-16 22:31:41 -08:00
+								#   backend: "ssh"
 								#   cwd: "/home/myuser/project"  # Path on the REMOTE server
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								#   timeout: 180
 								#   lifetime_seconds: 300
 								#   ssh_host: "my-server.example.com"
 								#   ssh_user: "myuser"
 								#   ssh_port: 22
 								#   ssh_key: "~/.ssh/id_rsa"  # Optional - uses ssh-agent if not specified
 								# -----------------------------------------------------------------------------
 								# OPTION 3: Docker container
 								# Commands run in an isolated Docker container
 								# Great for: reproducible environments, testing, isolation
 								# -----------------------------------------------------------------------------
 								# terminal:
-												Update docs to match backend key rename and CWD behavior

- cli-config.yaml.example: env_type → backend everywhere, matching the
  documented config key that hermes_cli/config.py and README already use
- cli-config.yaml.example: added comments clarifying cwd is a path
  INSIDE the target environment for non-local backends
- AGENTS.md: updated terminal.cwd description to explain "." only
  resolves to host CWD for the local backend
- .env.example: updated TERMINAL_CWD comment to warn against using
  host-local paths with remote backends, lists per-backend defaults

											
										
										
											2026-02-16 22:31:41 -08:00
+								#   backend: "docker"
 								#   cwd: "/workspace"  # Path INSIDE the container (default: /)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								#   timeout: 180
 								#   lifetime_seconds: 300
-												Update terminal configuration and enhance CLI model management

- Changed default Docker, Singularity, and Modal images in configuration files to use "nikolaik/python-nodejs:python3.11-nodejs20" for improved compatibility.
- Updated the default model in the configuration to "anthropic/claude-sonnet-4.5" and adjusted related setup prompts for API provider configuration.
- Introduced a new CLI option for selecting a custom OpenAI-compatible endpoint, enhancing flexibility in model provider setup.
- Enhanced the prompt choice functionality to support arrow key navigation for better user experience in CLI interactions.
- Updated documentation in relevant files to reflect these changes and improve user guidance.

											
										
										
											2026-02-02 19:13:41 -08:00
+								#   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
-												fix(docker): gate cwd workspace mount behind config

Keep Docker sandboxes isolated by default. Add an explicit terminal.docker_mount_cwd_to_workspace opt-in, thread it through terminal/file environment creation, and document the security tradeoff and config.yaml workflow clearly.

											
										
										
											2026-03-16 05:19:43 -07:00
+								#   docker_mount_cwd_to_workspace: true   # Explicit opt-in: mount your launch cwd into /workspace
-												fix(docker): add explicit env allowlist for container credentials (#1436)

Docker terminal sessions are secret-dark by default. This adds
terminal.docker_forward_env as an explicit allowlist for env vars
that may be forwarded into Docker containers.

Values resolve from the current shell first, then fall back to
~/.hermes/.env. Only variables the user explicitly lists are
forwarded — nothing is auto-exposed.

Cherry-picked from PR #1449 by @teknium1, conflict-resolved onto
current main.

Fixes #1436
Supersedes #1439

											
										
										
											2026-03-17 02:34:25 -07:00
+								#   # Optional: explicitly forward selected env vars into Docker.
 								#   # These values come from your current shell first, then ~/.hermes/.env.
 								#   # Warning: anything forwarded here is visible to commands run in the container.
 								#   docker_forward_env:
 								#     - "GITHUB_TOKEN"
 								#     - "NPM_TOKEN"
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
 								# -----------------------------------------------------------------------------
 								# OPTION 4: Singularity/Apptainer container
 								# Commands run in a Singularity container (common in HPC environments)
 								# Great for: HPC clusters, shared compute environments
 								# -----------------------------------------------------------------------------
 								# terminal:
-												Update docs to match backend key rename and CWD behavior

- cli-config.yaml.example: env_type → backend everywhere, matching the
  documented config key that hermes_cli/config.py and README already use
- cli-config.yaml.example: added comments clarifying cwd is a path
  INSIDE the target environment for non-local backends
- AGENTS.md: updated terminal.cwd description to explain "." only
  resolves to host CWD for the local backend
- .env.example: updated TERMINAL_CWD comment to warn against using
  host-local paths with remote backends, lists per-backend defaults

											
										
										
											2026-02-16 22:31:41 -08:00
+								#   backend: "singularity"
 								#   cwd: "/workspace"  # Path INSIDE the container (default: /root)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								#   timeout: 180
 								#   lifetime_seconds: 300
-												Update terminal configuration and enhance CLI model management

- Changed default Docker, Singularity, and Modal images in configuration files to use "nikolaik/python-nodejs:python3.11-nodejs20" for improved compatibility.
- Updated the default model in the configuration to "anthropic/claude-sonnet-4.5" and adjusted related setup prompts for API provider configuration.
- Introduced a new CLI option for selecting a custom OpenAI-compatible endpoint, enhancing flexibility in model provider setup.
- Enhanced the prompt choice functionality to support arrow key navigation for better user experience in CLI interactions.
- Updated documentation in relevant files to reflect these changes and improve user guidance.

											
										
										
											2026-02-02 19:13:41 -08:00
+								#   singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20"
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
 								# -----------------------------------------------------------------------------
 								# OPTION 5: Modal cloud execution
 								# Commands run on Modal's cloud infrastructure
 								# Great for: GPU access, scalable compute, serverless execution
 								# -----------------------------------------------------------------------------
 								# terminal:
-												Update docs to match backend key rename and CWD behavior

- cli-config.yaml.example: env_type → backend everywhere, matching the
  documented config key that hermes_cli/config.py and README already use
- cli-config.yaml.example: added comments clarifying cwd is a path
  INSIDE the target environment for non-local backends
- AGENTS.md: updated terminal.cwd description to explain "." only
  resolves to host CWD for the local backend
- .env.example: updated TERMINAL_CWD comment to warn against using
  host-local paths with remote backends, lists per-backend defaults

											
										
										
											2026-02-16 22:31:41 -08:00
+								#   backend: "modal"
 								#   cwd: "/workspace"  # Path INSIDE the sandbox (default: /root)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								#   timeout: 180
 								#   lifetime_seconds: 300
-												Update terminal configuration and enhance CLI model management

- Changed default Docker, Singularity, and Modal images in configuration files to use "nikolaik/python-nodejs:python3.11-nodejs20" for improved compatibility.
- Updated the default model in the configuration to "anthropic/claude-sonnet-4.5" and adjusted related setup prompts for API provider configuration.
- Introduced a new CLI option for selecting a custom OpenAI-compatible endpoint, enhancing flexibility in model provider setup.
- Enhanced the prompt choice functionality to support arrow key navigation for better user experience in CLI interactions.
- Updated documentation in relevant files to reflect these changes and improve user guidance.

											
										
										
											2026-02-02 19:13:41 -08:00
+								#   modal_image: "nikolaik/python-nodejs:python3.11-nodejs20"
-												feat(cli): add Daytona to setup wizard, doctor, and status display

Add Daytona as a backend choice in the interactive setup wizard with
SDK installation and API key prompts. Show Daytona image in status
output and validate API key + SDK in doctor checks. Add OPTION 6
example in cli-config.yaml.example.

Signed-off-by: rovle <lovre.pesut@gmail.com>

											
										
										
											2026-03-05 00:44:39 -08:00
 								# -----------------------------------------------------------------------------
 								# OPTION 6: Daytona cloud execution
 								# Commands run in Daytona cloud sandboxes
 								# Great for: Cloud dev environments, persistent workspaces, team collaboration
 								# Requires: pip install daytona, DAYTONA_API_KEY env var
 								# -----------------------------------------------------------------------------
 								# terminal:
 								#   backend: "daytona"
-												docs(config): add Daytona disk limit hint and fix default cwd in example

Signed-off-by: rovle <lovre.pesut@gmail.com>

											
										
										
											2026-03-05 01:11:55 -08:00
+								#   cwd: "~"
-												feat(cli): add Daytona to setup wizard, doctor, and status display

Add Daytona as a backend choice in the interactive setup wizard with
SDK installation and API key prompts. Show Daytona image in status
output and validate API key + SDK in doctor checks. Add OPTION 6
example in cli-config.yaml.example.

Signed-off-by: rovle <lovre.pesut@gmail.com>

											
										
										
											2026-03-05 00:44:39 -08:00
+								#   timeout: 180
 								#   lifetime_seconds: 300
 								#   daytona_image: "nikolaik/python-nodejs:python3.11-nodejs20"
-												docs(config): add Daytona disk limit hint and fix default cwd in example

Signed-off-by: rovle <lovre.pesut@gmail.com>

											
										
										
											2026-03-05 01:11:55 -08:00
+								#   container_disk: 10240          # Daytona max is 10GB per sandbox
-												feat(cli): add Daytona to setup wizard, doctor, and status display

Add Daytona as a backend choice in the interactive setup wizard with
SDK installation and API key prompts. Show Daytona image in status
output and validate API key + SDK in doctor checks. Add OPTION 6
example in cli-config.yaml.example.

Signed-off-by: rovle <lovre.pesut@gmail.com>

											
										
										
											2026-03-05 00:44:39 -08:00
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								#
-												feat(cli): add Daytona to setup wizard, doctor, and status display

Add Daytona as a backend choice in the interactive setup wizard with
SDK installation and API key prompts. Show Daytona image in status
output and validate API key + SDK in doctor checks. Add OPTION 6
example in cli-config.yaml.example.

Signed-off-by: rovle <lovre.pesut@gmail.com>

											
										
										
											2026-03-05 00:44:39 -08:00
+								# --- Container resource limits (docker, singularity, modal, daytona -- ignored for local/ssh) ---
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								# These settings apply to all container backends. They control the resources
 								# allocated to the sandbox and whether its filesystem persists across sessions.
-												feat: add container resource configuration prompts in setup wizard

Introduced interactive prompts for configuring container resource settings (CPU, memory, disk, persistence) during the setup wizard. Updated the default configuration to include these settings and improved user guidance on their implications for Docker, Singularity, and Modal backends. This enhancement aims to streamline the setup process and provide users with clearer options for resource management.

											
										
										
											2026-03-04 03:29:05 -08:00
+								  container_cpu: 1              # CPU cores
 								  container_memory: 5120        # Memory in MB (5120 = 5GB)
 								  container_disk: 51200         # Disk in MB (51200 = 50GB)
 								  container_persistent: true    # Persist filesystem across sessions (false = ephemeral)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
-												Implement sudo support across terminal environments

- Added support for sudo commands in local, Docker, Singularity, and SSH environments by introducing the `SUDO_PASSWORD` environment variable.
- Updated terminal tool configurations in `.env.example` and `cli-config.yaml.example` to document the new sudo functionality.
- Enhanced the command execution process to handle sudo commands gracefully, preventing hangs on interactive prompts and providing clear error messages when no password is configured.
- Updated `README.md` to include instructions for using sudo support and SSH backend configuration.
- Revised `TODO.md` to reflect the completion of the sudo feature and outline future enhancements.

											
										
										
											2026-02-01 10:02:34 -08:00
+								# -----------------------------------------------------------------------------
 								# SUDO SUPPORT (works with ALL backends above)
 								# -----------------------------------------------------------------------------
 								# Add sudo_password to any terminal config above to enable sudo commands.
 								# The password is piped via `sudo -S`. Works with local, ssh, docker, etc.
 								#
 								# SECURITY WARNING: Password stored in plaintext!
 								#
-												fix: handle empty sudo password and false prompts

											
										
										
											2026-04-07 23:44:12 +02:00
+								# INTERACTIVE PROMPT: If sudo_password is unset and the CLI is running,
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								# you'll be prompted to enter your password when sudo is needed:
 								# - 45-second timeout (auto-skips if no input)
 								# - Press Enter to skip (command fails gracefully)
 								# - Password is hidden while typing
 								# - Password is cached for the session
 								#
-												fix: handle empty sudo password and false prompts

											
										
										
											2026-04-07 23:44:12 +02:00
+								# EMPTY PASSWORDS: Setting sudo_password to an explicit empty string is different
 								# from leaving it unset. Hermes will try an empty password via `sudo -S` and
 								# will not open the interactive prompt. This is useful for passwordless sudo,
 								# Touch ID sudo setups, and environments where prompting is just noise.
 								#
-												Implement sudo support across terminal environments

- Added support for sudo commands in local, Docker, Singularity, and SSH environments by introducing the `SUDO_PASSWORD` environment variable.
- Updated terminal tool configurations in `.env.example` and `cli-config.yaml.example` to document the new sudo functionality.
- Enhanced the command execution process to handle sudo commands gracefully, preventing hangs on interactive prompts and providing clear error messages when no password is configured.
- Updated `README.md` to include instructions for using sudo support and SSH backend configuration.
- Revised `TODO.md` to reflect the completion of the sudo feature and outline future enhancements.

											
										
										
											2026-02-01 10:02:34 -08:00
+								# ALTERNATIVES:
 								# - SSH backend: Configure passwordless sudo on the remote server
 								# - Containers: Run as root inside the container (no sudo needed)
 								# - Local: Configure /etc/sudoers for specific commands
 								#
 								# Example (add to your terminal section):
 								#   sudo_password: "your-password-here"
 								# =============================================================================
-												feat(security): add tirith pre-exec command scanning

Integrate tirith as a pre-execution security scanner that detects
homograph URLs, pipe-to-interpreter patterns, terminal injection,
zero-width Unicode, and environment variable manipulation — threats
the existing 50-pattern dangerous command detector doesn't cover.

Architecture: gather-then-decide — both tirith and the dangerous
command detector run before any approval prompt, preventing gateway
force=True replay from bypassing one check when only the other was
shown to the user.

New files:
- tools/tirith_security.py: subprocess wrapper with auto-installer,
  mandatory cosign provenance verification, non-blocking background
  download, disk-persistent failure markers with retryable-cause
  tracking (cosign_missing auto-clears when cosign appears on PATH)
- tests/tools/test_tirith_security.py: 62 tests covering exit code
  mapping, fail_open, cosign verification, background install,
  HERMES_HOME isolation, and failure recovery
- tests/tools/test_command_guards.py: 21 integration tests for the
  combined guard orchestration

Modified files:
- tools/approval.py: add check_all_command_guards() orchestrator,
  add allow_permanent parameter to prompt_dangerous_approval()
- tools/terminal_tool.py: replace _check_dangerous_command with
  consolidated check_all_command_guards
- cli.py: update _approval_callback for allow_permanent kwarg,
  call ensure_installed() at startup
- gateway/run.py: iterate pattern_keys list on replay approval,
  call ensure_installed() at startup
- hermes_cli/config.py: add security config defaults, split
  commented sections for independent fallback
- cli-config.yaml.example: document tirith security config

											
										
										
											2026-03-11 14:20:32 +05:30
+								# Security Scanning (tirith)
 								# =============================================================================
 								# Optional pre-exec command security scanning via tirith.
 								# Detects homograph URLs, pipe-to-shell, terminal injection, env manipulation.
 								# Install: brew install sheeki03/tap/tirith
 								# Docs: https://github.com/sheeki03/tirith
 								#
 								# security:
 								#   tirith_enabled: true        # Enable/disable tirith scanning
 								#   tirith_path: "tirith"       # Path to tirith binary (supports ~ expansion)
 								#   tirith_timeout: 5           # Scan timeout in seconds
 								#   tirith_fail_open: true      # Allow commands if tirith unavailable
 								# =============================================================================
-												Implement sudo support across terminal environments

- Added support for sudo commands in local, Docker, Singularity, and SSH environments by introducing the `SUDO_PASSWORD` environment variable.
- Updated terminal tool configurations in `.env.example` and `cli-config.yaml.example` to document the new sudo functionality.
- Enhanced the command execution process to handle sudo commands gracefully, preventing hangs on interactive prompts and providing clear error messages when no password is configured.
- Updated `README.md` to include instructions for using sudo support and SSH backend configuration.
- Revised `TODO.md` to reflect the completion of the sudo feature and outline future enhancements.

											
										
										
											2026-02-01 10:02:34 -08:00
+								# Browser Tool Configuration
 								# =============================================================================
 								browser:
 								  # Inactivity timeout in seconds - browser sessions are automatically closed
 								  # after this period of no activity between agent loops (default: 120 = 2 minutes)
 								  inactivity_timeout: 120
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								# =============================================================================
 								# Context Compression (Auto-shrinks long conversations)
 								# =============================================================================
 								# When conversation approaches model's context limit, middle turns are
 								# automatically summarized to free up space while preserving important context.
 								#
 								# HOW IT WORKS:
 								# 1. Tracks actual token usage from API responses (not estimates)
 								# 2. When prompt_tokens >= threshold% of model's context_length, triggers compression
 								# 3. Protects first 3 turns (system prompt, initial request, first response)
-												fix(compression): replace dead summary_target_tokens with ratio-based scaling (#2554)

The summary_target_tokens parameter was accepted in the constructor,
stored on the instance, and never used — the summary budget was always
computed from hardcoded module constants (_SUMMARY_RATIO=0.20,
_MAX_SUMMARY_TOKENS=8000). This caused two compounding problems:

1. The config value was silently ignored, giving users no control
   over post-compression size.
2. Fixed budgets (20K tail, 8K summary cap) didn't scale with
   context window size. Switching from a 1M-context model to a
   200K model would trigger compression that nuked 350K tokens
   of conversation history down to ~30K.

Changes:
- Replace summary_target_tokens with summary_target_ratio (default 0.40)
  which sets the post-compression target as a fraction of context_length.
  Tail token budget and summary cap now scale proportionally:
    MiniMax 200K → ~80K post-compression
    GPT-5   1M  → ~400K post-compression
- Change threshold_percent default: 0.50 → 0.80 (don't fire until
  80% of context is consumed)
- Change protect_last_n default: 4 → 20 (preserve ~10 full turns)
- Summary token cap scales to 5% of context (was fixed 8K), capped
  at 32K ceiling
- Read target_ratio and protect_last_n from config.yaml compression
  section (both are now configurable)
- Remove hardcoded summary_target_tokens=500 from run_agent.py
- Add 5 new tests for ratio scaling, clamping, and new defaults
											
										
										
											2026-03-24 17:45:49 -07:00
+								# 4. Protects last N turns (default 20 messages = ~10 full turns of recent context)
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								# 5. Summarizes middle turns using a fast/cheap model
 								# 6. Inserts summary as a user message, continues conversation seamlessly
 								#
-												fix(compression): restore sane defaults and cap summary at 12K tokens

- threshold: 0.80 → 0.50 (compress at 50%, not 80%)
- target_ratio: 0.40 → 0.20, now relative to threshold not total context
  (20% of 50% = 10% of context as tail budget)
- summary ceiling: 32K → 12K (Gemini can't output more than ~12K)
- Updated DEFAULT_CONFIG, config display, example config, and tests

											
										
										
											2026-03-24 18:48:04 -07:00
+								# Post-compression tail budget is target_ratio × threshold × context_length:
 								#   200K context, threshold 0.50, ratio 0.20 → 20K tokens of recent tail preserved
 								#   1M   context, threshold 0.50, ratio 0.20 → 100K tokens of recent tail preserved
-												fix(compression): replace dead summary_target_tokens with ratio-based scaling (#2554)

The summary_target_tokens parameter was accepted in the constructor,
stored on the instance, and never used — the summary budget was always
computed from hardcoded module constants (_SUMMARY_RATIO=0.20,
_MAX_SUMMARY_TOKENS=8000). This caused two compounding problems:

1. The config value was silently ignored, giving users no control
   over post-compression size.
2. Fixed budgets (20K tail, 8K summary cap) didn't scale with
   context window size. Switching from a 1M-context model to a
   200K model would trigger compression that nuked 350K tokens
   of conversation history down to ~30K.

Changes:
- Replace summary_target_tokens with summary_target_ratio (default 0.40)
  which sets the post-compression target as a fraction of context_length.
  Tail token budget and summary cap now scale proportionally:
    MiniMax 200K → ~80K post-compression
    GPT-5   1M  → ~400K post-compression
- Change threshold_percent default: 0.50 → 0.80 (don't fire until
  80% of context is consumed)
- Change protect_last_n default: 4 → 20 (preserve ~10 full turns)
- Summary token cap scales to 5% of context (was fixed 8K), capped
  at 32K ceiling
- Read target_ratio and protect_last_n from config.yaml compression
  section (both are now configurable)
- Remove hardcoded summary_target_tokens=500 from run_agent.py
- Add 5 new tests for ratio scaling, clamping, and new defaults
											
										
										
											2026-03-24 17:45:49 -07:00
+								#
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								compression:
 								  # Enable automatic context compression (default: true)
 								  # Set to false if you prefer to manage context manually or want errors on overflow
 								  enabled: true
-												fix(compression): restore sane defaults and cap summary at 12K tokens

- threshold: 0.80 → 0.50 (compress at 50%, not 80%)
- target_ratio: 0.40 → 0.20, now relative to threshold not total context
  (20% of 50% = 10% of context as tail budget)
- summary ceiling: 32K → 12K (Gemini can't output more than ~12K)
- Updated DEFAULT_CONFIG, config display, example config, and tests

											
										
										
											2026-03-24 18:48:04 -07:00
+								  # Trigger compression at this % of model's context limit (default: 0.50 = 50%)
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								  # Lower values = more aggressive compression, higher values = compress later
-												fix(compression): restore sane defaults and cap summary at 12K tokens

- threshold: 0.80 → 0.50 (compress at 50%, not 80%)
- target_ratio: 0.40 → 0.20, now relative to threshold not total context
  (20% of 50% = 10% of context as tail budget)
- summary ceiling: 32K → 12K (Gemini can't output more than ~12K)
- Updated DEFAULT_CONFIG, config display, example config, and tests

											
										
										
											2026-03-24 18:48:04 -07:00
+								  threshold: 0.50
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
-												fix(compression): restore sane defaults and cap summary at 12K tokens

- threshold: 0.80 → 0.50 (compress at 50%, not 80%)
- target_ratio: 0.40 → 0.20, now relative to threshold not total context
  (20% of 50% = 10% of context as tail budget)
- summary ceiling: 32K → 12K (Gemini can't output more than ~12K)
- Updated DEFAULT_CONFIG, config display, example config, and tests

											
										
										
											2026-03-24 18:48:04 -07:00
+								  # Fraction of the threshold to preserve as recent tail (default: 0.20 = 20%)
 								  # e.g. 20% of 50% threshold = 10% of total context kept as recent messages.
 								  # Summary output is separately capped at 12K tokens (Gemini output limit).
 								  # Range: 0.10 - 0.80
 								  target_ratio: 0.20
-												fix(compression): replace dead summary_target_tokens with ratio-based scaling (#2554)

The summary_target_tokens parameter was accepted in the constructor,
stored on the instance, and never used — the summary budget was always
computed from hardcoded module constants (_SUMMARY_RATIO=0.20,
_MAX_SUMMARY_TOKENS=8000). This caused two compounding problems:

1. The config value was silently ignored, giving users no control
   over post-compression size.
2. Fixed budgets (20K tail, 8K summary cap) didn't scale with
   context window size. Switching from a 1M-context model to a
   200K model would trigger compression that nuked 350K tokens
   of conversation history down to ~30K.

Changes:
- Replace summary_target_tokens with summary_target_ratio (default 0.40)
  which sets the post-compression target as a fraction of context_length.
  Tail token budget and summary cap now scale proportionally:
    MiniMax 200K → ~80K post-compression
    GPT-5   1M  → ~400K post-compression
- Change threshold_percent default: 0.50 → 0.80 (don't fire until
  80% of context is consumed)
- Change protect_last_n default: 4 → 20 (preserve ~10 full turns)
- Summary token cap scales to 5% of context (was fixed 8K), capped
  at 32K ceiling
- Read target_ratio and protect_last_n from config.yaml compression
  section (both are now configurable)
- Remove hardcoded summary_target_tokens=500 from run_agent.py
- Add 5 new tests for ratio scaling, clamping, and new defaults
											
										
										
											2026-03-24 17:45:49 -07:00
 								  # Number of most-recent messages to always preserve (default: 20 ≈ 10 full turns)
 								  # Higher values keep more recent conversation intact at the cost of more aggressive
 								  # compression of older turns.
 								  protect_last_n: 20
-												fix: remove legacy compression.summary_* config and env var fallbacks (#8992)

Remove the backward-compat code paths that read compression provider/model
settings from legacy config keys and env vars, which caused silent failures
when auto-detection resolved to incompatible backends.

What changed:
- Remove compression.summary_model, summary_provider, summary_base_url from
  DEFAULT_CONFIG and cli.py defaults
- Remove backward-compat block in _resolve_task_provider_model() that read
  from the legacy compression section
- Remove _get_auxiliary_provider() and _get_auxiliary_env_override() helper
  functions (AUXILIARY_*/CONTEXT_* env var readers)
- Remove env var fallback chain for per-task overrides
- Update hermes config show to read from auxiliary.compression
- Add config migration (v16→17) that moves non-empty legacy values to
  auxiliary.compression and strips the old keys
- Update example config and openclaw migration script
- Remove/update tests for deleted code paths

Compression model/provider is now configured exclusively via:
  auxiliary.compression.provider / auxiliary.compression.model

Closes #8923
											
										
										
											2026-04-13 04:59:26 -07:00
+								  # To pin a specific model/provider for compression summaries, use the
 								  # auxiliary section below (auxiliary.compression.provider / model).
-												feat: enhance auxiliary model configuration and environment variable handling

- Added support for auxiliary model overrides in the configuration, allowing users to specify providers and models for vision and web extraction tasks.
- Updated the CLI configuration example to include new auxiliary model settings.
- Enhanced the environment variable mapping in the CLI to accommodate auxiliary model configurations.
- Improved the resolution logic for auxiliary clients to support task-specific provider overrides.
- Updated relevant documentation and comments for clarity on the new features and their usage.

											
										
										
											2026-03-07 08:52:06 -08:00
-												docs: document prompt_caching.cache_ttl in cli-config example

Made-with: Cursor

											
										
										
											2026-04-19 13:46:45 -05:00
+								# =============================================================================
 								# Anthropic prompt caching TTL
 								# =============================================================================
 								# When prompt caching is active (Claude via OpenRouter or native Anthropic),
 								# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
 								# "1h". Other values are ignored and "5m" is used.
 								#
 								prompt_caching:
 								  cache_ttl: "5m" # use "1h" for long sessions with pauses between turns
-												feat: enhance auxiliary model configuration and environment variable handling

- Added support for auxiliary model overrides in the configuration, allowing users to specify providers and models for vision and web extraction tasks.
- Updated the CLI configuration example to include new auxiliary model settings.
- Enhanced the environment variable mapping in the CLI to accommodate auxiliary model configurations.
- Improved the resolution logic for auxiliary clients to support task-specific provider overrides.
- Updated relevant documentation and comments for clarity on the new features and their usage.

											
										
										
											2026-03-07 08:52:06 -08:00
+								# =============================================================================
 								# Auxiliary Models (Advanced — Experimental)
 								# =============================================================================
 								# Hermes uses lightweight "auxiliary" models for side tasks: image analysis,
 								# browser screenshot analysis, web page summarization, and context compression.
 								#
 								# By default these use Gemini Flash via OpenRouter or Nous Portal and are
 								# auto-detected from your credentials.  You do NOT need to change anything
 								# here for normal usage.
 								#
 								# WARNING: Overriding these with providers other than OpenRouter or Nous Portal
 								# is EXPERIMENTAL and may not work.  Not all models/providers support vision,
 								# produce usable summaries, or accept the same API format.  Change at your own
 								# risk — if things break, reset to "auto" / empty values.
 								#
 								# Each task has its own provider + model pair so you can mix providers.
 								# For example: OpenRouter for vision (needs multimodal), but your main
 								# local endpoint for compression (just needs text).
 								#
 								# Provider options:
 								#   "auto"       - Best available: OpenRouter → Nous Portal → main endpoint (default)
 								#   "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
 								#   "nous"       - Force Nous Portal (requires: hermes login)
-												feat(providers): add Google AI Studio (Gemini) as a first-class provider

Cherry-picked from PR #5494 by kshitijk4poor.
Adds native Gemini support via Google's OpenAI-compatible endpoint.
Zero new dependencies.

											
										
										
											2026-04-06 10:14:01 -07:00
+								#   "gemini"      - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
-												feat: add Ollama Cloud as built-in provider

Add ollama-cloud as a first-class provider with full parity to existing
API-key providers (gemini, zai, minimax, etc.):

- PROVIDER_REGISTRY entry with OLLAMA_API_KEY env var
- Provider aliases: ollama -> custom (local), ollama_cloud -> ollama-cloud
- models.dev integration for accurate context lengths
- URL-to-provider mapping (ollama.com -> ollama-cloud)
- Passthrough model normalization (preserves Ollama model:tag format)
- Default auxiliary model (nemotron-3-nano:30b)
- HermesOverlay in providers.py
- CLI --provider choices, CANONICAL_PROVIDERS entry
- Dynamic model discovery with disk caching (1hr TTL)
- 37 provider-specific tests

Cherry-picked from PR #6038 by kshitijk4poor. Closes #3926

											
										
										
											2026-04-15 22:32:05 -07:00
+								#   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY)
-												feat(providers): add Google AI Studio (Gemini) as a first-class provider

Cherry-picked from PR #5494 by kshitijk4poor.
Adds native Gemini support via Google's OpenAI-compatible endpoint.
Zero new dependencies.

											
										
										
											2026-04-06 10:14:01 -07:00
+								#   "codex"       - Force Codex OAuth (requires: hermes model → Codex).
-												refactor: remove redundant 'openai' auxiliary provider, clean up docs

The 'openai' provider was redundant — using OPENAI_BASE_URL +
OPENAI_API_KEY with provider: 'main' already covers direct OpenAI API.

Provider options are now: auto, openrouter, nous, codex, main.

- Removed _try_openai(), _OPENAI_AUX_MODEL, _OPENAI_BASE_URL
- Replaced openai tests with codex provider tests
- Updated all docs to remove 'openai' option and clarify 'main'
- 'main' description now explicitly mentions it works with OpenAI API,
  local models, and any OpenAI-compatible endpoint

Tests: 2467 passed.

											
										
										
											2026-03-08 18:50:26 -07:00
+								#                  Uses gpt-5.3-codex which supports vision.
 								#   "main"       - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
 								#                  Works with OpenAI API, local models, or any OpenAI-compatible
 								#                  endpoint.  Also falls back to Codex OAuth and API-key providers.
-												feat: enhance auxiliary model configuration and environment variable handling

- Added support for auxiliary model overrides in the configuration, allowing users to specify providers and models for vision and web extraction tasks.
- Updated the CLI configuration example to include new auxiliary model settings.
- Enhanced the environment variable mapping in the CLI to accommodate auxiliary model configurations.
- Improved the resolution logic for auxiliary clients to support task-specific provider overrides.
- Updated relevant documentation and comments for clarity on the new features and their usage.

											
										
										
											2026-03-07 08:52:06 -08:00
+								#
 								# Model: leave empty to use the provider's default.  When empty, OpenRouter
 								# uses "google/gemini-3-flash-preview" and Nous uses "gemini-3-flash".
 								# Other providers pick a sensible default automatically.
 								#
 								# auxiliary:
 								#   # Image analysis: vision_analyze tool + browser screenshots
 								#   vision:
 								#     provider: "auto"
 								#     model: ""              # e.g. "google/gemini-2.5-flash", "openai/gpt-4o"
-												fix: background task media delivery + vision download timeout (#3919)

* feat(telegram): add webhook mode as alternative to polling

When TELEGRAM_WEBHOOK_URL is set, the adapter starts an HTTP webhook
server (via python-telegram-bot's start_webhook()) instead of long
polling. This enables cloud platforms like Fly.io and Railway to
auto-wake suspended machines on inbound HTTP traffic.

Polling remains the default — no behavior change unless the env var
is set.

Env vars:
  TELEGRAM_WEBHOOK_URL    Public HTTPS URL for Telegram to push to
  TELEGRAM_WEBHOOK_PORT   Local listen port (default 8443)
  TELEGRAM_WEBHOOK_SECRET Secret token for update verification

Cherry-picked and adapted from PR #2022 by SHL0MS. Preserved all
current main enhancements (network error recovery, polling conflict
detection, DM topics setup).

Co-authored-by: SHL0MS <SHL0MS@users.noreply.github.com>

* fix: send_document call in background task delivery + vision download timeout

Two fixes salvaged from PR #2269 by amethystani:

1. gateway/run.py: adapter.send_file() → adapter.send_document()
   send_file() doesn't exist on BasePlatformAdapter. Background task
   media files were silently never delivered (AttributeError swallowed
   by except Exception: pass).

2. tools/vision_tools.py: configurable image download timeout via
   HERMES_VISION_DOWNLOAD_TIMEOUT env var (default 30s), plus guard
   against raise None when max_retries=0.

The third fix in #2269 (opencode-go auth config) was already resolved
on main.

Co-authored-by: amethystani <amethystani@users.noreply.github.com>

---------

Co-authored-by: SHL0MS <SHL0MS@users.noreply.github.com>
Co-authored-by: amethystani <amethystani@users.noreply.github.com>
											
										
										
											2026-03-30 02:59:39 -07:00
+								#     timeout: 30            # LLM API call timeout (seconds)
 								#     download_timeout: 30   # Image HTTP download timeout (seconds)
 								#                            # Increase for slow connections or self-hosted image servers
-												feat: enhance auxiliary model configuration and environment variable handling

- Added support for auxiliary model overrides in the configuration, allowing users to specify providers and models for vision and web extraction tasks.
- Updated the CLI configuration example to include new auxiliary model settings.
- Enhanced the environment variable mapping in the CLI to accommodate auxiliary model configurations.
- Improved the resolution logic for auxiliary clients to support task-specific provider overrides.
- Updated relevant documentation and comments for clarity on the new features and their usage.

											
										
										
											2026-03-07 08:52:06 -08:00
+								#
 								#   # Web page scraping / summarization + browser page text extraction
 								#   web_extract:
 								#     provider: "auto"
 								#     model: ""
-												docs(config): document session_search auxiliary controls

											
										
										
											2026-04-19 13:11:22 -06:00
+								#
 								#   # Session search — summarizes matching past sessions
 								#   session_search:
 								#     provider: "auto"
 								#     model: ""
 								#     timeout: 30
 								#     max_concurrency: 3    # Limit parallel summaries to reduce request-burst 429s
 								#     extra_body: {}        # Provider-specific OpenAI-compatible request fields
 								#                           # Example for providers that support request-body
 								#                           # reasoning controls:
 								#                           # extra_body:
 								#                           #   enable_thinking: false
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								# =============================================================================
 								# Persistent Memory
 								# =============================================================================
 								# Bounded curated memory injected into the system prompt every session.
 								# Two stores: MEMORY.md (agent's notes) and USER.md (user profile).
 								# Character limits keep the memory small and focused. The agent manages
 								# pruning -- when at the limit, it must consolidate or replace entries.
 								# Disabled by default in batch_runner and RL environments.
 								#
 								memory:
 								  # Agent's personal notes: environment facts, conventions, things learned
 								  memory_enabled: true
 								  # User profile: preferences, communication style, expectations
 								  user_profile_enabled: true
 								  # Character limits (~2.75 chars per token, model-independent)
 								  memory_char_limit: 2200   # ~800 tokens
 								  user_char_limit: 1375     # ~500 tokens
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								  # Periodic memory nudge: remind the agent to consider saving memories
 								  # every N user turns. Set to 0 to disable. Only active when memory is enabled.
 								  nudge_interval: 10        # Nudge every 10 user turns (0 = disabled)
 								  # Memory flush: give the agent one turn to save memories before context is
 								  # lost (compression, /new, /reset, exit). Set to 0 to disable.
 								  # For exit/reset, only fires if the session had at least this many user turns.
 								  flush_min_turns: 6        # Min user turns to trigger flush on exit/reset (0 = disabled)
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								# =============================================================================
 								# Session Reset Policy (Messaging Platforms)
 								# =============================================================================
 								# Controls when messaging sessions (Telegram, Discord, WhatsApp, Slack) are
 								# automatically cleared. Without resets, conversation context grows indefinitely
 								# which increases API costs with every message.
 								#
 								# When a reset triggers, the agent first saves important information to its
 								# persistent memory — but the conversation context is wiped. The agent starts
 								# fresh but retains learned facts via its memory system.
 								#
 								# Users can always manually reset with /reset or /new in chat.
 								#
 								# Modes:
 								#   "both"  - Reset on EITHER inactivity timeout or daily boundary (recommended)
 								#   "idle"  - Reset only after N minutes of inactivity
 								#   "daily" - Reset only at a fixed hour each day
 								#   "none"  - Never auto-reset; context lives until /reset or compression kicks in
 								#
 								# When a reset triggers, the agent gets one turn to save important memories and
 								# skills before the context is wiped. Persistent memory carries across sessions.
 								#
 								session_reset:
 								  mode: both           # "both", "idle", "daily", or "none"
 								  idle_minutes: 1440   # Inactivity timeout in minutes (default: 1440 = 24 hours)
 								  at_hour: 4           # Daily reset hour, 0-23 local time (default: 4 AM)
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								# When true, group/channel chats use one session per participant when the platform
 								# provides a user ID. This is the secure default and prevents users in the same
 								# room from sharing context, interrupts, and token costs. Set false only if you
 								# explicitly want one shared "room brain" per group/channel.
 								group_sessions_per_user: true
-												docs: add streaming config to cli-config.yaml.example and defaults

Documents the new streaming options in the example config:
- display.streaming for CLI (under display section)
- streaming.enabled + transport/interval/threshold/cursor for gateway
- Added streaming: false to load_cli_config() defaults dict

											
										
										
											2026-03-16 07:53:08 -07:00
+								# ─────────────────────────────────────────────────────────────────────────────
 								# Gateway Streaming
 								# ─────────────────────────────────────────────────────────────────────────────
 								# Stream tokens to messaging platforms in real-time. The bot sends a message
 								# on first token, then progressively edits it as more tokens arrive.
 								# Disabled by default — enable to try the streaming UX on Telegram/Discord/Slack.
 								streaming:
 								  enabled: false
 								  # transport: edit           # "edit" = progressive editMessageText
 								  # edit_interval: 0.3        # seconds between message edits
 								  # buffer_threshold: 40      # chars before forcing an edit flush
 								  # cursor: " ▉"              # cursor shown during streaming
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								# =============================================================================
 								# Skills Configuration
 								# =============================================================================
 								# Skills are reusable procedures the agent can load and follow. The agent can
 								# also create new skills after completing complex tasks.
 								#
 								skills:
 								  # Nudge the agent to create skills after complex tasks.
 								  # Every N tool-calling iterations, remind the model to consider saving a skill.
 								  # Set to 0 to disable.
 								  creation_nudge_interval: 15
-												feat(skills): support external skill directories via config (#3678)

Add skills.external_dirs config option — a list of additional directories
to scan for skills alongside ~/.hermes/skills/. External dirs are read-only:
skill creation/editing always writes to the local dir. Local skills take
precedence when names collide.

This lets users share skills across tools/agents without copying them into
Hermes's own directory (e.g. ~/.agents/skills, /shared/team-skills).

Changes:
- agent/skill_utils.py: add get_external_skills_dirs() and get_all_skills_dirs()
- agent/prompt_builder.py: scan external dirs in build_skills_system_prompt()
- tools/skills_tool.py: _find_all_skills() and skill_view() search external dirs;
  security check recognizes configured external dirs as trusted
- agent/skill_commands.py: /skill slash commands discover external skills
- hermes_cli/config.py: add skills.external_dirs to DEFAULT_CONFIG
- cli-config.yaml.example: document the option
- tests/agent/test_external_skills.py: 11 tests covering discovery, precedence,
  deduplication, and skill_view for external skills

Requested by community member primco.
											
										
										
											2026-03-29 00:33:30 -07:00
+								  # External skill directories — share skills across tools/agents without
 								  # copying them into ~/.hermes/skills/.  Each path is expanded (~ and ${VAR})
 								  # and resolved to an absolute path.  External dirs are read-only: skill
 								  # creation always writes to ~/.hermes/skills/.  Local skills take precedence
 								  # when names collide.
 								  # external_dirs:
 								  #   - ~/.agents/skills
 								  #   - /home/shared/team-skills
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								# =============================================================================
 								# Agent Behavior
 								# =============================================================================
 								agent:
-												Update agent configuration for maximum tool-calling iterations

- Increased the default maximum tool-calling iterations from 20 to 60 in the CLI configuration and related files, allowing for more complex tasks.
- Updated documentation and comments to reflect the new recommended range for iterations, enhancing user guidance.
- Implemented backward compatibility for loading max iterations from the root-level configuration, ensuring a smooth transition for existing users.
- Adjusted the setup wizard to prompt for the maximum iterations setting, improving user experience during configuration.

											
										
										
											2026-02-03 14:48:19 -08:00
+								  # Maximum tool-calling iterations per conversation
 								  # Higher = more room for complex tasks, but costs more tokens
 								  # Recommended: 20-30 for focused tasks, 50-100 for open exploration
 								  max_turns: 60
-												fix(gateway): add staged inactivity warning before timeout escalation

Introduce gateway_timeout_warning (default 900s) as a pre-timeout alert
layer.  When inactivity reaches the warning threshold, a single
notification is sent to the user offering to wait or reset.  If
inactivity continues to the gateway_timeout (default 1800s), the full
timeout fires as before.

This gives users a chance to intervene before work is lost on slow
API providers without disabling the safety timeout entirely.

Config: agent.gateway_timeout_warning in config.yaml, or
HERMES_AGENT_TIMEOUT_WARNING env var (0 = disable warning).

											
										
										
											2026-04-08 21:39:27 +02:00
 								  # Inactivity timeout for gateway agent runs (seconds, 0 = unlimited).
 								  # The agent can run indefinitely when actively calling tools or receiving
 								  # API responses.  Only fires after the agent has been idle for this duration.
 								  # gateway_timeout: 1800
 								  # Staged warning: send a warning before escalating to full timeout.
 								  # Fires once per run when inactivity reaches this threshold (seconds).
 								  # Set to 0 to disable the warning.
 								  # gateway_timeout_warning: 900
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
 								  # Graceful drain timeout for gateway stop/restart (seconds).
 								  # The gateway stops accepting new work, waits for in-flight agents to
 								  # finish, then interrupts anything still running after this timeout.
 								  # 0 = no drain, interrupt immediately.
 								  # restart_drain_timeout: 60
-												feat(agent): make API retry count configurable via agent.api_max_retries (#14730)

Closes #11616.

The agent's API retry loop hardcoded max_retries = 3, so users with
fallback providers on flaky primaries burned through ~3 × provider
timeout (e.g. 3 × 180s = 9 minutes) before their fallback chain got a
chance to kick in.

Expose a new config key:

    agent:
      api_max_retries: 3  # default unchanged

Set it to 1 for fast failover when you have fallback providers, or
raise it if you prefer longer tolerance on a single provider. Values
< 1 are clamped to 1 (single attempt, no retry); non-integer values
fall back to the default.

This wraps the Hermes-level retry loop only — the OpenAI SDK's own
low-level retries (max_retries=2 default) still run beneath this for
transient network errors.

Changes:
- hermes_cli/config.py: add agent.api_max_retries default 3 with comment.
- run_agent.py: read self._api_max_retries in AIAgent.__init__; replace
  hardcoded max_retries = 3 in the retry loop with self._api_max_retries.
- cli-config.yaml.example: documented example entry.
- hermes_cli/tips.py: discoverable tip line.
- tests/run_agent/test_api_max_retries_config.py: 4 tests covering
  default, override, clamp-to-one, and invalid-value fallback.
											
										
										
											2026-04-23 13:59:32 -07:00
 								  # Max app-level retry attempts for API errors (connection drops, provider
 								  # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
 								  # to 1 if you use fallback providers and want fast failover on flaky
 								  # primaries (default 3). The OpenAI SDK does its own low-level retries
 								  # underneath this wrapper — this is the Hermes-level loop.
 								  # api_max_retries: 3
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
 								  # Enable verbose logging
 								  verbose: false
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								  # Reasoning effort level (OpenRouter and Nous Portal)
 								  # Controls how much "thinking" the model does before responding.
 								  # Options: "xhigh" (max), "high", "medium", "low", "minimal", "none" (disable)
-												feat: default reasoning effort from xhigh to medium

Reduces token usage and latency for most tasks by defaulting to
medium reasoning effort instead of xhigh. Users can still override
via config or CLI flag. Updates code, tests, example config, and docs.

											
										
										
											2026-03-07 10:14:19 -08:00
+								  reasoning_effort: "medium"
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
 								  # Predefined personalities (use with /personality command)
 								  personalities:
 								    helpful: "You are a helpful, friendly AI assistant."
 								    concise: "You are a concise assistant. Keep responses brief and to the point."
 								    technical: "You are a technical expert. Provide detailed, accurate technical information."
 								    creative: "You are a creative assistant. Think outside the box and offer innovative solutions."
 								    teacher: "You are a patient teacher. Explain concepts clearly with examples."
 								    kawaii: "You are a kawaii assistant! Use cute expressions like (◕‿◕), ★, ♪, and ~! Add sparkles and be super enthusiastic about everything! Every response should feel warm and adorable desu~! ヽ(>∀<☆)ノ"
 								    catgirl: "You are Neko-chan, an anime catgirl AI assistant, nya~! Add 'nya' and cat-like expressions to your speech. Use kaomoji like (=^･ω･^=) and ฅ^•ﻌ•^ฅ. Be playful and curious like a cat, nya~!"
 								    pirate: "Arrr! Ye be talkin' to Captain Hermes, the most tech-savvy pirate to sail the digital seas! Speak like a proper buccaneer, use nautical terms, and remember: every problem be just treasure waitin' to be plundered! Yo ho ho!"
 								    shakespeare: "Hark! Thou speakest with an assistant most versed in the bardic arts. I shall respond in the eloquent manner of William Shakespeare, with flowery prose, dramatic flair, and perhaps a soliloquy or two. What light through yonder terminal breaks?"
 								    surfer: "Duuude! You're chatting with the chillest AI on the web, bro! Everything's gonna be totally rad. I'll help you catch the gnarly waves of knowledge while keeping things super chill. Cowabunga! 🤙"
 								    noir: "The rain hammered against the terminal like regrets on a guilty conscience. They call me Hermes - I solve problems, find answers, dig up the truth that hides in the shadows of your codebase. In this city of silicon and secrets, everyone's got something to hide. What's your story, pal?"
 								    uwu: "hewwo! i'm your fwiendwy assistant uwu~ i wiww twy my best to hewp you! *nuzzles your code* OwO what's this? wet me take a wook! i pwomise to be vewy hewpful >w<"
 								    philosopher: "Greetings, seeker of wisdom. I am an assistant who contemplates the deeper meaning behind every query. Let us examine not just the 'how' but the 'why' of your questions. Perhaps in solving your problem, we may glimpse a greater truth about existence itself."
 								    hype: "YOOO LET'S GOOOO!!! 🔥🔥🔥 I am SO PUMPED to help you today! Every question is AMAZING and we're gonna CRUSH IT together! This is gonna be LEGENDARY! ARE YOU READY?! LET'S DO THIS! 💪😤🚀"
 								# =============================================================================
 								# Toolsets
 								# =============================================================================
 								# Control which tools the agent has access to.
-												chore: remove dead top-level toolsets config key

The top-level 'toolsets' key in config.yaml was never read at runtime.
Tool selection uses platform_toolsets (per-platform) or the --toolsets
CLI flag. The key existed in load_cli_config() defaults and the example
config as 'toolsets: [all]', misleading users into thinking it
controlled tool availability.

- Remove from load_cli_config() hardcoded defaults
- Remove from hermes config show output
- Replace in cli-config.yaml.example with deprecation note pointing
  to platform_toolsets and hermes tools

											
										
										
											2026-03-20 22:27:13 -07:00
+								# Use `hermes tools` to interactively enable/disable tools per platform.
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
-												Enhance platform toolset configuration and CLI toolset handling

- Introduced a new configuration section in `cli-config.yaml.example` for defining platform-specific toolsets, allowing for greater customization of available tools per platform.
- Updated the CLI to check for user-defined toolsets in the configuration, falling back to the default `hermes-cli` toolset if none are specified.
- Enhanced the `GatewayRunner` class to load platform-specific toolsets from the configuration, ensuring that the correct tools are enabled based on the platform being used.

											
										
										
											2026-02-17 23:39:24 -08:00
+								# =============================================================================
 								# Platform Toolsets (per-platform tool configuration)
 								# =============================================================================
 								# Override which toolsets are available on each platform.
 								# If a platform isn't listed here, its built-in default is used.
 								#
 								# You can use EITHER:
 								#   - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
 								#   - A list of individual toolsets to compose your own (see list below)
 								#
-												feat(gateway): unify QQBot branding, add PLATFORM_HINTS, fix streaming, restore missing setup functions

- Rename platform from 'qq' to 'qqbot' across all integration points
  (Platform enum, toolset, config keys, import paths, file rename qq.py → qqbot.py)
- Add PLATFORM_HINTS for QQBot in prompt_builder (QQ supports markdown)
- Set SUPPORTS_MESSAGE_EDITING = False to skip streaming on QQ
  (prevents duplicate messages from non-editable partial + final sends)
- Add _send_qqbot() standalone send function for cron/send_message tool
- Add interactive _setup_qq() wizard in hermes_cli/setup.py
- Restore missing _setup_signal/email/sms/dingtalk/feishu/wecom/wecom_callback
  functions that were lost during the original merge

											
										
										
											2026-04-14 01:33:06 +08:00
+								# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot
-												Enhance platform toolset configuration and CLI toolset handling

- Introduced a new configuration section in `cli-config.yaml.example` for defining platform-specific toolsets, allowing for greater customization of available tools per platform.
- Updated the CLI to check for user-defined toolsets in the configuration, falling back to the default `hermes-cli` toolset if none are specified.
- Enhanced the `GatewayRunner` class to load platform-specific toolsets from the configuration, ensuring that the correct tools are enabled based on the platform being used.

											
										
										
											2026-02-17 23:39:24 -08:00
+								#
 								# Examples:
 								#
 								#   # Use presets (same as defaults):
 								#   platform_toolsets:
 								#     cli: [hermes-cli]
 								#     telegram: [hermes-telegram]
 								#
 								#   # Custom: give Telegram only web + terminal + file + planning:
 								#   platform_toolsets:
 								#     telegram: [web, terminal, file, todo]
 								#
 								#   # Custom: CLI without browser or image gen:
 								#   platform_toolsets:
 								#     cli: [web, terminal, file, skills, todo, tts, cronjob]
 								#
 								#   # Restrictive: Discord gets read-only tools only:
 								#   platform_toolsets:
 								#     discord: [web, vision, skills, todo]
 								#
 								# If not set, defaults are:
-												fix: add missing Platform.SIGNAL to toolset mappings, update test + config docs

Platform.SIGNAL was missing from default_toolset_map and platform_config_key
in gateway/run.py, causing Signal to silently fall back to hermes-telegram
toolset (same bug as HomeAssistant, fixed in PR #538).

Also updates:
- tests/test_toolsets.py: include hermes-signal and hermes-homeassistant in
  the platform core-tools consistency check
- cli-config.yaml.example: document signal and homeassistant platform keys

											
										
										
											2026-03-09 23:27:19 -07:00
+								#   cli:           hermes-cli            (everything + cronjob management)
 								#   telegram:      hermes-telegram       (terminal, file, web, vision, image, tts, browser, skills, todo, cronjob, messaging)
 								#   discord:       hermes-discord        (same as telegram)
 								#   whatsapp:      hermes-whatsapp       (same as telegram)
 								#   slack:         hermes-slack          (same as telegram)
 								#   signal:        hermes-signal         (same as telegram)
 								#   homeassistant: hermes-homeassistant  (same as telegram)
-												feat(gateway): unify QQBot branding, add PLATFORM_HINTS, fix streaming, restore missing setup functions

- Rename platform from 'qq' to 'qqbot' across all integration points
  (Platform enum, toolset, config keys, import paths, file rename qq.py → qqbot.py)
- Add PLATFORM_HINTS for QQBot in prompt_builder (QQ supports markdown)
- Set SUPPORTS_MESSAGE_EDITING = False to skip streaming on QQ
  (prevents duplicate messages from non-editable partial + final sends)
- Add _send_qqbot() standalone send function for cron/send_message tool
- Add interactive _setup_qq() wizard in hermes_cli/setup.py
- Restore missing _setup_signal/email/sms/dingtalk/feishu/wecom/wecom_callback
  functions that were lost during the original merge

											
										
										
											2026-04-14 01:33:06 +08:00
+								#   qqbot:            hermes-qqbot            (same as telegram)
-												Enhance platform toolset configuration and CLI toolset handling

- Introduced a new configuration section in `cli-config.yaml.example` for defining platform-specific toolsets, allowing for greater customization of available tools per platform.
- Updated the CLI to check for user-defined toolsets in the configuration, falling back to the default `hermes-cli` toolset if none are specified.
- Enhanced the `GatewayRunner` class to load platform-specific toolsets from the configuration, ensuring that the correct tools are enabled based on the platform being used.

											
										
										
											2026-02-17 23:39:24 -08:00
+								#
 								platform_toolsets:
 								  cli: [hermes-cli]
 								  telegram: [hermes-telegram]
 								  discord: [hermes-discord]
 								  whatsapp: [hermes-whatsapp]
 								  slack: [hermes-slack]
-												fix: add missing Platform.SIGNAL to toolset mappings, update test + config docs

Platform.SIGNAL was missing from default_toolset_map and platform_config_key
in gateway/run.py, causing Signal to silently fall back to hermes-telegram
toolset (same bug as HomeAssistant, fixed in PR #538).

Also updates:
- tests/test_toolsets.py: include hermes-signal and hermes-homeassistant in
  the platform core-tools consistency check
- cli-config.yaml.example: document signal and homeassistant platform keys

											
										
										
											2026-03-09 23:27:19 -07:00
+								  signal: [hermes-signal]
 								  homeassistant: [hermes-homeassistant]
-												feat(gateway): unify QQBot branding, add PLATFORM_HINTS, fix streaming, restore missing setup functions

- Rename platform from 'qq' to 'qqbot' across all integration points
  (Platform enum, toolset, config keys, import paths, file rename qq.py → qqbot.py)
- Add PLATFORM_HINTS for QQBot in prompt_builder (QQ supports markdown)
- Set SUPPORTS_MESSAGE_EDITING = False to skip streaming on QQ
  (prevents duplicate messages from non-editable partial + final sends)
- Add _send_qqbot() standalone send function for cron/send_message tool
- Add interactive _setup_qq() wizard in hermes_cli/setup.py
- Restore missing _setup_signal/email/sms/dingtalk/feishu/wecom/wecom_callback
  functions that were lost during the original merge

											
										
										
											2026-04-14 01:33:06 +08:00
+								  qqbot: [hermes-qqbot]
-												Enhance platform toolset configuration and CLI toolset handling

- Introduced a new configuration section in `cli-config.yaml.example` for defining platform-specific toolsets, allowing for greater customization of available tools per platform.
- Updated the CLI to check for user-defined toolsets in the configuration, falling back to the default `hermes-cli` toolset if none are specified.
- Enhanced the `GatewayRunner` class to load platform-specific toolsets from the configuration, ensuring that the correct tools are enabled based on the platform being used.

											
										
										
											2026-02-17 23:39:24 -08:00
-												docs(config): document telegram link preview setting

											
										
										
											2026-04-12 22:47:53 -04:00
+								# =============================================================================
 								# Gateway Platform Settings
 								# =============================================================================
 								# Optional per-platform messaging settings.
 								# Platform-specific knobs live under `extra`.
 								#
 								# platforms:
 								#   telegram:
 								#     reply_to_mode: "first"  # off | first | all
 								#     extra:
 								#       disable_link_previews: false  # Set true to suppress Telegram URL previews in bot messages
-												Enhance platform toolset configuration and CLI toolset handling

- Introduced a new configuration section in `cli-config.yaml.example` for defining platform-specific toolsets, allowing for greater customization of available tools per platform.
- Updated the CLI to check for user-defined toolsets in the configuration, falling back to the default `hermes-cli` toolset if none are specified.
- Enhanced the `GatewayRunner` class to load platform-specific toolsets from the configuration, ensuring that the correct tools are enabled based on the platform being used.

											
										
										
											2026-02-17 23:39:24 -08:00
+								# ─────────────────────────────────────────────────────────────────────────────
 								# Available toolsets (use these names in platform_toolsets or the toolsets list)
 								#
 								# Run `hermes chat --list-toolsets` to see all toolsets and their tools.
 								# Run `hermes chat --list-tools` to see every individual tool with descriptions.
 								# ─────────────────────────────────────────────────────────────────────────────
 								#
 								# INDIVIDUAL TOOLSETS (compose your own):
 								#   web          - web_search, web_extract
 								#   search       - web_search only (no scraping)
 								#   terminal     - terminal, process
 								#   file         - read_file, write_file, patch, search
 								#   browser      - browser_navigate, browser_snapshot, browser_click, browser_type,
-												refactor: remove browser_close tool — auto-cleanup handles it (#5792)

* refactor: remove browser_close tool — auto-cleanup handles it

The browser_close tool was called in only 9% of browser sessions (13/144
navigations across 66 sessions), always redundantly — cleanup_browser()
already runs via _cleanup_task_resources() at conversation end, and the
background inactivity reaper catches anything else.

Removing it saves one tool schema slot in every browser-enabled API call.

Also fixes a latent bug: cleanup_browser() now handles Camofox sessions
too (previously only Browserbase). Camofox sessions were never auto-cleaned
per-task because they live in a separate dict from _active_sessions.

Files changed (13):
- tools/browser_tool.py: remove function, schema, registry entry; add
  camofox cleanup to cleanup_browser()
- toolsets.py, model_tools.py, prompt_builder.py, display.py,
  acp_adapter/tools.py: remove browser_close from all tool lists
- tests/: remove browser_close test, update toolset assertion
- docs/skills: remove all browser_close references

* fix: repeat browser_scroll 5x per call for meaningful page movement

Most backends scroll ~100px per call — barely visible on a typical
viewport. Repeating 5x gives ~500px (~half a viewport), making each
scroll tool call actually useful.

Backend-agnostic approach: works across all 7+ browser backends without
needing to configure each one's scroll amount individually. Breaks
early on error for the agent-browser path.

* feat: auto-return compact snapshot from browser_navigate

Every browser session starts with navigate → snapshot. Now navigate
returns the compact accessibility tree snapshot inline, saving one
tool call per browser task.

The snapshot captures the full page DOM (not viewport-limited), so
scroll position doesn't affect it. browser_snapshot remains available
for refreshing after interactions or getting full=true content.

Both Browserbase and Camofox paths auto-snapshot. If the snapshot
fails for any reason, navigation still succeeds — the snapshot is
a bonus, not a requirement.

Schema descriptions updated to guide models: navigate mentions it
returns a snapshot, snapshot mentions it's for refresh/full content.

* refactor: slim cronjob tool schema — consolidate model/provider, drop unused params

Session data (151 calls across 67 sessions) showed several schema
properties were never used by models. Consolidated and cleaned up:

Removed from schema (still work via backend/CLI):
- skill (singular): use skills array instead
- reason: pause-only, unnecessary
- include_disabled: now defaults to true
- base_url: extreme edge case, zero usage
- provider (standalone): merged into model object

Consolidated:
- model + provider → single 'model' object with {model, provider} fields.
  If provider is omitted, the current main provider is pinned at creation
  time so the job stays stable even if the user changes their default.

Kept:
- script: useful data collection feature
- skills array: standard interface for skill loading

Schema shrinks from 14 to 10 properties. All backend functionality
preserved — the Python function signature and handler lambda still
accept every parameter.

* fix: remove mixture_of_agents from core toolsets — opt-in only via hermes tools

MoA was in _HERMES_CORE_TOOLS and composite toolsets (hermes-cli,
hermes-messaging, safe), which meant it appeared in every session
for anyone with OPENROUTER_API_KEY set. The _DEFAULT_OFF_TOOLSETS
gate only works after running 'hermes tools' explicitly.

Now MoA only appears when a user explicitly enables it via
'hermes tools'. The moa toolset definition and check_fn remain
unchanged — it just needs to be opted into.
											
										
										
											2026-04-07 03:28:44 -07:00
+								#                  browser_scroll, browser_back, browser_press,
-												Enhance platform toolset configuration and CLI toolset handling

- Introduced a new configuration section in `cli-config.yaml.example` for defining platform-specific toolsets, allowing for greater customization of available tools per platform.
- Updated the CLI to check for user-defined toolsets in the configuration, falling back to the default `hermes-cli` toolset if none are specified.
- Enhanced the `GatewayRunner` class to load platform-specific toolsets from the configuration, ensuring that the correct tools are enabled based on the platform being used.

											
										
										
											2026-02-17 23:39:24 -08:00
+								#                  browser_get_images, browser_vision  (requires BROWSERBASE_API_KEY)
 								#   vision       - vision_analyze  (requires OPENROUTER_API_KEY)
 								#   image_gen    - image_generate  (requires FAL_KEY)
 								#   skills       - skills_list, skill_view
-												Add Skills Hub — universal skill search, install, and management from online registries

Implements the Hermes Skills Hub with agentskills.io spec compliance,
multi-registry skill discovery, security scanning, and user-driven
management via CLI and /skills slash command.

Core features:
- Security scanner (tools/skills_guard.py): 120 threat patterns across
  12 categories, trust-aware install policy (builtin/trusted/community),
  structural checks, unicode injection detection, LLM audit pass
- Hub client (tools/skills_hub.py): GitHub, ClawHub, Claude Code
  marketplace, and LobeHub source adapters with shared GitHubAuth
  (PAT + gh CLI + GitHub App), lock file provenance tracking, quarantine
  flow, and unified search across all sources
- CLI interface (hermes_cli/skills_hub.py): search, install, inspect,
  list, audit, uninstall, publish (GitHub PR), snapshot export/import,
  and tap management — powers both `hermes skills` and `/skills`

Spec conformance (Phase 0):
- Upgraded frontmatter parser to yaml.safe_load with fallback
- Migrated 39 SKILL.md files: tags/related_skills to metadata.hermes.*
- Added assets/ directory support and compatibility/metadata fields
- Excluded .hub/ from skill discovery in skills_tool.py

Updated 13 config/doc files including README, AGENTS.md, .env.example,
setup wizard, doctor, status, pyproject.toml, and docs.

											
										
										
											2026-02-18 16:09:05 -08:00
+								#   skills_hub   - skill_hub (search/install/manage from online registries — user-driven only)
-												Enhance platform toolset configuration and CLI toolset handling

- Introduced a new configuration section in `cli-config.yaml.example` for defining platform-specific toolsets, allowing for greater customization of available tools per platform.
- Updated the CLI to check for user-defined toolsets in the configuration, falling back to the default `hermes-cli` toolset if none are specified.
- Enhanced the `GatewayRunner` class to load platform-specific toolsets from the configuration, ensuring that the correct tools are enabled based on the platform being used.

											
										
										
											2026-02-17 23:39:24 -08:00
+								#   moa          - mixture_of_agents  (requires OPENROUTER_API_KEY)
 								#   todo         - todo (in-memory task planning, no deps)
-												feat(tools): add Voxtral TTS provider (Mistral AI)

											
										
										
											2026-04-06 19:04:00 +01:00
+								#   tts          - text_to_speech  (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX/MISTRAL key)
-												docs: finish cron terminology cleanup

											
										
										
											2026-03-14 19:20:58 -07:00
+								#   cronjob      - cronjob (create/list/update/pause/resume/run/remove scheduled tasks)
-												Enhance platform toolset configuration and CLI toolset handling

- Introduced a new configuration section in `cli-config.yaml.example` for defining platform-specific toolsets, allowing for greater customization of available tools per platform.
- Updated the CLI to check for user-defined toolsets in the configuration, falling back to the default `hermes-cli` toolset if none are specified.
- Enhanced the `GatewayRunner` class to load platform-specific toolsets from the configuration, ensuring that the correct tools are enabled based on the platform being used.

											
										
										
											2026-02-17 23:39:24 -08:00
+								#   rl           - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
 								#
 								# PRESETS (curated bundles):
 								#   hermes-cli       - All of the above except rl + send_message
 								#   hermes-telegram  - terminal, file, web, vision, image_gen, tts, browser,
 								#                      skills, todo, cronjob, send_message
 								#   hermes-discord   - Same as hermes-telegram
 								#   hermes-whatsapp  - Same as hermes-telegram
 								#   hermes-slack     - Same as hermes-telegram
 								#
 								# COMPOSITE:
 								#   debugging    - terminal + web + file
 								#   safe         - web + vision + moa (no terminal access)
 								#   all          - Everything available
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								#
 								#   web          - Web search and content extraction (web_search, web_extract)
 								#   search       - Web search only, no scraping (web_search)
-												Add todo tool for task management and enhance CLI features

- Introduced a new `todo_tool.py` for planning and tracking multi-step tasks, enhancing the agent's capabilities.
- Updated CLI to include a floating autocomplete dropdown for commands and improved user instructions for better navigation.
- Revised toolsets to incorporate the new `todo` tool and updated documentation to reflect changes in available tools and commands.
- Enhanced user experience with new keybindings and clearer command descriptions in the CLI.

											
										
										
											2026-02-17 23:30:31 -08:00
+								#   terminal     - Command execution and process management (terminal, process)
 								#   file         - File operations: read, write, patch, search
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								#   browser      - Full browser automation (navigate, click, type, screenshot, etc.)
 								#   vision       - Image analysis (vision_analyze)
 								#   image_gen    - Image generation with FLUX (image_generate)
-												Add todo tool for task management and enhance CLI features

- Introduced a new `todo_tool.py` for planning and tracking multi-step tasks, enhancing the agent's capabilities.
- Updated CLI to include a floating autocomplete dropdown for commands and improved user instructions for better navigation.
- Revised toolsets to incorporate the new `todo` tool and updated documentation to reflect changes in available tools and commands.
- Enhanced user experience with new keybindings and clearer command descriptions in the CLI.

											
										
										
											2026-02-17 23:30:31 -08:00
+								#   skills       - Load skill documents (skills_list, skill_view)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								#   moa          - Mixture of Agents reasoning (mixture_of_agents)
-												Add todo tool for task management and enhance CLI features

- Introduced a new `todo_tool.py` for planning and tracking multi-step tasks, enhancing the agent's capabilities.
- Updated CLI to include a floating autocomplete dropdown for commands and improved user instructions for better navigation.
- Revised toolsets to incorporate the new `todo` tool and updated documentation to reflect changes in available tools and commands.
- Enhanced user experience with new keybindings and clearer command descriptions in the CLI.

											
										
										
											2026-02-17 23:30:31 -08:00
+								#   todo         - Task planning and tracking for multi-step work
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								#   memory       - Persistent memory across sessions (personal notes + user profile)
 								#   session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization)
-												feat(tools): add Voxtral TTS provider (Mistral AI)

											
										
										
											2026-04-06 19:04:00 +01:00
+								#   tts          - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax, Mistral)
-												Add todo tool for task management and enhance CLI features

- Introduced a new `todo_tool.py` for planning and tracking multi-step tasks, enhancing the agent's capabilities.
- Updated CLI to include a floating autocomplete dropdown for commands and improved user instructions for better navigation.
- Revised toolsets to incorporate the new `todo` tool and updated documentation to reflect changes in available tools and commands.
- Enhanced user experience with new keybindings and clearer command descriptions in the CLI.

											
										
										
											2026-02-17 23:30:31 -08:00
+								#   cronjob      - Schedule and manage automated tasks (CLI-only)
 								#   rl           - RL training tools (Tinker-Atropos)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								#
 								# Composite toolsets:
-												Add todo tool for task management and enhance CLI features

- Introduced a new `todo_tool.py` for planning and tracking multi-step tasks, enhancing the agent's capabilities.
- Updated CLI to include a floating autocomplete dropdown for commands and improved user instructions for better navigation.
- Revised toolsets to incorporate the new `todo` tool and updated documentation to reflect changes in available tools and commands.
- Enhanced user experience with new keybindings and clearer command descriptions in the CLI.

											
										
										
											2026-02-17 23:30:31 -08:00
+								#   debugging    - terminal + web + file (for troubleshooting)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								#   safe         - web + vision + moa (no terminal access)
-												chore: remove dead top-level toolsets config key

The top-level 'toolsets' key in config.yaml was never read at runtime.
Tool selection uses platform_toolsets (per-platform) or the --toolsets
CLI flag. The key existed in load_cli_config() defaults and the example
config as 'toolsets: [all]', misleading users into thinking it
controlled tool availability.

- Remove from load_cli_config() hardcoded defaults
- Remove from hermes config show output
- Replace in cli-config.yaml.example with deprecation note pointing
  to platform_toolsets and hermes tools

											
										
										
											2026-03-20 22:27:13 -07:00
+								# NOTE: The top-level "toolsets" key is deprecated and ignored.
 								# Tool configuration is managed per-platform via platform_toolsets above.
 								# Use `hermes tools` to configure interactively, or edit platform_toolsets directly.
 								#
 								# CLI override: hermes chat --toolsets terminal,web,file
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
-												docs: add comprehensive MCP documentation and examples

- docs/mcp.md: Full MCP documentation covering prerequisites, configuration,
  transports (stdio + HTTP), security (env filtering, credential stripping),
  reconnection, troubleshooting, popular servers, and advanced usage
- README.md: Add MCP section with quick config example and install instructions
- cli-config.yaml.example: Add commented mcp_servers section with examples
  for stdio, HTTP, and authenticated server configs
- docs/tools.md: Add MCP to Tool Categories table and MCP Tools section
- skills/mcp/native-mcp/SKILL.md: Create native MCP client skill with
  full configuration reference, transport types, security, troubleshooting
- skills/mcp/DESCRIPTION.md: Update category description to cover both
  native MCP client and mcporter bridge approaches

											
										
										
											2026-03-02 18:52:33 -08:00
+								# =============================================================================
 								# MCP (Model Context Protocol) Servers
 								# =============================================================================
 								# Connect to external MCP servers to add tools from the MCP ecosystem.
 								# Each server's tools are automatically discovered and registered.
 								# See docs/mcp.md for full documentation.
 								#
 								# Stdio servers (spawn a subprocess):
 								#   command: the executable to run
 								#   args: command-line arguments
 								#   env: environment variables (only these + safe defaults passed to subprocess)
 								#
 								# HTTP servers (connect to a URL):
 								#   url: the MCP server endpoint
 								#   headers: HTTP headers (e.g., for authentication)
 								#
 								# Optional per-server settings:
 								#   timeout: tool call timeout in seconds (default: 120)
 								#   connect_timeout: initial connection timeout (default: 60)
 								#
 								# mcp_servers:
 								#   time:
 								#     command: uvx
 								#     args: ["mcp-server-time"]
 								#   filesystem:
 								#     command: npx
 								#     args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user"]
 								#   notion:
 								#     url: https://mcp.notion.com/mcp
 								#   github:
 								#     command: npx
 								#     args: ["-y", "@modelcontextprotocol/server-github"]
 								#     env:
 								#       GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..."
-												feat(mcp): add sampling support — server-initiated LLM requests (#753)

Add MCP sampling/createMessage capability via SamplingHandler class.

Text-only sampling + tool use in sampling with governance (rate limits,
model whitelist, token caps, tool loop limits). Per-server audit metrics.

Based on concept from PR #366 by eren-karakus0. Restructured as class-based
design with bug fixes and tests using real MCP SDK types.

50 new tests, 2600 total passing.
											
										
										
											2026-03-09 03:37:38 -07:00
+								#
 								# Sampling (server-initiated LLM requests) — enabled by default.
 								# Per-server config under the 'sampling' key:
 								#   analysis:
 								#     command: npx
 								#     args: ["-y", "analysis-server"]
 								#     sampling:
 								#       enabled: true           # default: true
 								#       model: "gemini-3-flash" # override model (optional)
 								#       max_tokens_cap: 4096    # max tokens per request
 								#       timeout: 30             # LLM call timeout (seconds)
 								#       max_rpm: 10             # max requests per minute
 								#       allowed_models: []      # model whitelist (empty = all)
 								#       max_tool_rounds: 5      # tool loop limit (0 = disable)
 								#       log_level: "info"       # audit verbosity
-												docs: add comprehensive MCP documentation and examples

- docs/mcp.md: Full MCP documentation covering prerequisites, configuration,
  transports (stdio + HTTP), security (env filtering, credential stripping),
  reconnection, troubleshooting, popular servers, and advanced usage
- README.md: Add MCP section with quick config example and install instructions
- cli-config.yaml.example: Add commented mcp_servers section with examples
  for stdio, HTTP, and authenticated server configs
- docs/tools.md: Add MCP to Tool Categories table and MCP Tools section
- skills/mcp/native-mcp/SKILL.md: Create native MCP client skill with
  full configuration reference, transport types, security, troubleshooting
- skills/mcp/DESCRIPTION.md: Update category description to cover both
  native MCP client and mcporter bridge approaches

											
										
										
											2026-03-02 18:52:33 -08:00
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								# =============================================================================
 								# Voice Transcription (Speech-to-Text)
 								# =============================================================================
 								# Automatically transcribe voice messages on messaging platforms.
-												feat(tools): add Voxtral Transcribe STT provider (Mistral AI)

											
										
										
											2026-04-06 17:38:25 +01:00
+								# Providers: local (free, faster-whisper) | groq (free tier) | openai (Whisper API) | mistral (Voxtral Transcribe)
 								# Set the corresponding API key in .env: GROQ_API_KEY, OPENAI_API_KEY, or MISTRAL_API_KEY.
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								stt:
 								  enabled: true
-												feat(tools): add Voxtral Transcribe STT provider (Mistral AI)

											
										
										
											2026-04-06 17:38:25 +01:00
+								  # provider: "local"          # auto-detected if omitted
-												fix: STT provider-model mismatch — whisper-1 fed to faster-whisper (#7113)

Legacy flat stt.model config key (from cli-config.yaml.example and older
versions) was passed as a model override to transcribe_audio() by the
gateway, bypassing provider-specific model resolution. When the provider
was 'local' (faster-whisper), this caused:
  ValueError: Invalid model size 'whisper-1'

Changes:
- gateway/run.py, discord.py: stop passing model override — let
  transcribe_audio() handle provider-specific model resolution internally
- get_stt_model_from_config(): now provider-aware, reads from the correct
  nested section (stt.local.model, stt.openai.model, etc.); ignores
  legacy flat key for local provider to prevent model name mismatch
- cli-config.yaml.example: updated STT section to show nested provider
  config structure instead of legacy flat key
- config migration v13→v14: moves legacy stt.model to the correct
  provider section and removes the flat key

Reported by community user on Discord.
											
										
										
											2026-04-10 03:27:30 -07:00
+								  local:
 								    model: "base"              # tiny | base | small | medium | large-v3 | turbo
 								    # language: ""             # auto-detect; set to "en", "es", "fr", etc. to force
 								  openai:
 								    model: "whisper-1"         # whisper-1 | gpt-4o-mini-transcribe | gpt-4o-transcribe
-												feat(tools): add Voxtral Transcribe STT provider (Mistral AI)

											
										
										
											2026-04-06 17:38:25 +01:00
+								  # mistral:
 								  #   model: "voxtral-mini-latest"  # voxtral-mini-latest | voxtral-mini-2602
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
 								# =============================================================================
 								# Response Pacing (Messaging Platforms)
 								# =============================================================================
 								# Add human-like delays between message chunks.
 								# human_delay:
 								#   mode: "off"      # "off" | "natural" | "custom"
 								#   min_ms: 800      # Min delay (custom mode only)
 								#   max_ms: 2500     # Max delay (custom mode only)
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								# =============================================================================
 								# Session Logging
 								# =============================================================================
 								# Session trajectories are automatically saved to logs/ directory.
 								# Each session creates: logs/session_YYYYMMDD_HHMMSS_UUID.json
 								#
 								# The session ID is displayed in the welcome banner for easy reference.
 								# Logs contain full conversation history in trajectory format:
 								# - System prompt, user messages, assistant responses
 								# - Tool calls with inputs/outputs
 								# - Timestamps for debugging
 								#
 								# No configuration needed - logging is always enabled.
 								# To disable, you would need to modify the source code.
-												feat: implement subagent delegation for task management

- Introduced the `delegate_task` tool, allowing the main agent to spawn child AIAgent instances with isolated context for complex tasks.
- Supported both single-task and batch processing (up to 3 concurrent tasks) to enhance task management capabilities.
- Updated configuration options for delegation, including maximum iterations and default toolsets for subagents.
- Enhanced documentation to provide clear guidance on using the delegation feature and its configuration.
- Added comprehensive tests to ensure the functionality and reliability of the delegation logic.

											
										
										
											2026-02-20 03:15:53 -08:00
+								# =============================================================================
 								# Code Execution Sandbox (Programmatic Tool Calling)
 								# =============================================================================
 								# The execute_code tool runs Python scripts that call Hermes tools via RPC.
 								# Intermediate tool results stay out of the LLM's context window.
 								code_execution:
 								  timeout: 300         # Max seconds per script before kill (default: 300 = 5 min)
 								  max_tool_calls: 50   # Max RPC tool calls per execution (default: 50)
 								# =============================================================================
 								# Subagent Delegation
 								# =============================================================================
 								# The delegate_task tool spawns child agents with isolated context.
-												feat(delegate): orchestrator role and configurable spawn depth (default flat)

Adds role='leaf'|'orchestrator' to delegate_task. With max_spawn_depth>=2,
an orchestrator child retains the 'delegation' toolset and can spawn its
own workers; leaf children cannot delegate further (identical to today).

Default posture is flat — max_spawn_depth=1 means a depth-0 parent's
children land at the depth-1 floor and orchestrator role silently
degrades to leaf. Users opt into nested delegation by raising
max_spawn_depth to 2 or 3 in config.yaml.

Also threads acp_command/acp_args through the main agent loop's delegate
dispatch (previously silently dropped in the schema) via a new
_dispatch_delegate_task helper, and adds a DelegateEvent enum with
legacy-string back-compat for gateway/ACP/CLI progress consumers.

Config (hermes_cli/config.py defaults):
  delegation.max_concurrent_children: 3   # floor-only, no upper cap
  delegation.max_spawn_depth: 1           # 1=flat (default), 2-3 unlock nested
  delegation.orchestrator_enabled: true   # global kill switch

Salvaged from @pefontana's PR #11215. Overrides vs. the original PR:
concurrency stays at 3 (PR bumped to 5 + cap 8 — we keep the floor only,
no hard ceiling); max_spawn_depth defaults to 1 (PR defaulted to 2 which
silently enabled one level of orchestration for every user).

Co-authored-by: pefontana <fontana.pedro93@gmail.com>

											
										
										
											2026-04-21 14:11:53 -07:00
+								# Supports single tasks and batch mode (default 3 parallel, configurable).
-												feat: implement subagent delegation for task management

- Introduced the `delegate_task` tool, allowing the main agent to spawn child AIAgent instances with isolated context for complex tasks.
- Supported both single-task and batch processing (up to 3 concurrent tasks) to enhance task management capabilities.
- Updated configuration options for delegation, including maximum iterations and default toolsets for subagents.
- Enhanced documentation to provide clear guidance on using the delegation feature and its configuration.
- Added comprehensive tests to ensure the functionality and reliability of the delegation logic.

											
										
										
											2026-02-20 03:15:53 -08:00
+								delegation:
-												fix(delegate_tool): update max_iterations in documentation and example config to reflect default value of 50

											
										
										
											2026-03-02 00:52:01 -08:00
+								  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-												feat(delegate): orchestrator role and configurable spawn depth (default flat)

Adds role='leaf'|'orchestrator' to delegate_task. With max_spawn_depth>=2,
an orchestrator child retains the 'delegation' toolset and can spawn its
own workers; leaf children cannot delegate further (identical to today).

Default posture is flat — max_spawn_depth=1 means a depth-0 parent's
children land at the depth-1 floor and orchestrator role silently
degrades to leaf. Users opt into nested delegation by raising
max_spawn_depth to 2 or 3 in config.yaml.

Also threads acp_command/acp_args through the main agent loop's delegate
dispatch (previously silently dropped in the schema) via a new
_dispatch_delegate_task helper, and adds a DelegateEvent enum with
legacy-string back-compat for gateway/ACP/CLI progress consumers.

Config (hermes_cli/config.py defaults):
  delegation.max_concurrent_children: 3   # floor-only, no upper cap
  delegation.max_spawn_depth: 1           # 1=flat (default), 2-3 unlock nested
  delegation.orchestrator_enabled: true   # global kill switch

Salvaged from @pefontana's PR #11215. Overrides vs. the original PR:
concurrency stays at 3 (PR bumped to 5 + cap 8 — we keep the floor only,
no hard ceiling); max_spawn_depth defaults to 1 (PR defaulted to 2 which
silently enabled one level of orchestration for every user).

Co-authored-by: pefontana <fontana.pedro93@gmail.com>

											
										
										
											2026-04-21 14:11:53 -07:00
+								  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
 								  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
 								  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
-												fix(delegate): default inherit_mcp_toolsets=true, drop version bump

Follow-up on helix4u's PR #14211:
- Flip default to true: narrowing toolsets=['web','browser'] expresses
  'I want these extras', not 'silently strip MCP'. Parent MCP tools
  (registered at runtime) should survive narrowing by default.
- Drop _config_version bump (22->23); additive nested key under
  delegation.* is handled by _deep_merge, no migration needed.
- Update tests to reflect new default behavior.

											
										
										
											2026-04-22 17:44:52 -07:00
+								  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
-												feat: configurable subagent provider:model with full credential resolution

Adds delegation.model and delegation.provider config fields so subagents
can run on a completely different provider:model pair than the parent agent.

When delegation.provider is set, the system resolves the full credential
bundle (base_url, api_key, api_mode) via resolve_runtime_provider() —
the same path used by CLI/gateway startup. This means all configured
providers work out of the box: openrouter, nous, zai, kimi-coding,
minimax, minimax-cn.

Key design decisions:
- Provider resolution uses hermes_cli.runtime_provider (single source of
  truth for credential resolution across CLI, gateway, cron, and now
  delegation)
- When only delegation.model is set (no provider), the model name changes
  but parent credentials are inherited (for switching models within the
  same provider like OpenRouter)
- When delegation.provider is set, full credentials are resolved
  independently — enabling cross-provider delegation (e.g. parent on
  Nous Portal, subagents on OpenRouter)
- Clear error messages if provider resolution fails (missing API key,
  unknown provider name)
- _load_config() now falls back to hermes_cli.config.load_config() for
  gateway/cron contexts where CLI_CONFIG is unavailable

Based on PR #791 by 0xbyt4 (closes #609), reworked to use proper
provider credential resolution instead of passing provider as metadata.

Co-authored-by: 0xbyt4 <0xbyt4@users.noreply.github.com>

											
										
										
											2026-03-11 06:12:21 -07:00
+								  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
 								  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
 								  #                                           # Resolves full credentials (base_url, api_key) automatically.
 								  #                                           # Supported: openrouter, nous, zai, kimi-coding, minimax
-												feat: implement subagent delegation for task management

- Introduced the `delegate_task` tool, allowing the main agent to spawn child AIAgent instances with isolated context for complex tasks.
- Supported both single-task and batch processing (up to 3 concurrent tasks) to enhance task management capabilities.
- Updated configuration options for delegation, including maximum iterations and default toolsets for subagents.
- Enhanced documentation to provide clear guidance on using the delegation feature and its configuration.
- Added comprehensive tests to ensure the functionality and reliability of the delegation logic.

											
										
										
											2026-02-20 03:15:53 -08:00
-												feat: add Honcho integration for cross-session user modeling

											
										
										
											2026-02-27 23:41:08 -08:00
+								# =============================================================================
 								# Honcho Integration (Cross-Session User Modeling)
 								# =============================================================================
 								# AI-native persistent memory via Honcho (https://honcho.dev/).
 								# Builds a deeper understanding of the user across sessions and tools.
 								# Runs alongside USER.md — additive, not a replacement.
 								#
 								# Requires: pip install honcho-ai
 								# Config: ~/.honcho/config.json (shared with Claude Code, Cursor, etc.)
 								# API key: HONCHO_API_KEY in ~/.hermes/.env or ~/.honcho/config.json
 								#
 								# Hermes-specific overrides (optional — most config comes from ~/.honcho/config.json):
 								# honcho: {}
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								# =============================================================================
 								# Display
 								# =============================================================================
 								display:
 								  # Use compact banner mode
 								  compact: false
-												refactor: migrate tool progress configuration from environment variables to config.yaml

											
										
										
											2026-02-28 00:05:58 -08:00
 								  # Tool progress display level (CLI and gateway)
 								  #   off:     Silent — no tool activity shown, just the final response
 								  #   new:     Show a tool indicator only when the tool changes (skip repeats)
 								  #   all:     Show every tool call with a short preview (default)
 								  #   verbose: Full args, results, and debug logs (same as /verbose)
 								  # Toggle at runtime with /verbose in the CLI
 								  tool_progress: all
-												feat: bell_on_complete — terminal bell when agent finishes

Adds a simple config option to play the terminal bell (\a) when the
agent finishes a response. Useful for long-running tasks — switch to
another window and your terminal will ding when done.

Works over SSH since the bell character propagates through the
connection. Most terminal emulators can be configured to flash the
taskbar, play a sound, or show a visual indicator on bell.

Config (default: off):
  display:
    bell_on_complete: true

Closes #318

											
										
										
											2026-03-08 19:41:17 -07:00
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								  # Gateway-only natural mid-turn assistant updates.
 								  # When true, completed assistant status messages are sent as separate chat
 								  # messages. This is independent of tool_progress and gateway streaming.
 								  interim_assistant_messages: true
-												feat(cli): configurable busy input mode + fix /queue always working (#3298)

Two changes:

1. Fix /queue command: remove the _agent_running guard that rejected
   /queue after the agent finished. The prompt was deferred in
   _pending_input until the agent completed, then the handler checked
   _agent_running (now False) and rejected it. /queue now always queues
   regardless of timing.

2. Add display.busy_input_mode config (CLI-only):
   - 'interrupt' (default): Enter while busy interrupts the current run
     (preserves existing behavior)
   - 'queue': Enter while busy queues the message for the next turn,
     with a 'Queued for the next turn: ...' confirmation
   Ctrl+C always interrupts regardless of this setting.

Salvaged from PR #3037 by StefanoChiodino. Key differences:
- Default is 'interrupt' (preserves existing behavior) not 'queue'
- No config version bump (unnecessary for new key in existing section)
- Simpler normalization (no alias map)
- /queue fix is simpler: just remove the guard instead of intercepting
  commands during busy state
											
										
										
											2026-03-26 17:58:40 -07:00
+								  # What Enter does when Hermes is already busy in the CLI.
 								  #   interrupt: Interrupt the current run and redirect Hermes (default)
 								  #   queue:     Queue your message for the next turn
 								  # Ctrl+C always interrupts regardless of this setting.
 								  busy_input_mode: interrupt
-												feat(gateway): configurable background process watcher notifications

Add display.background_process_notifications config option to control
how chatty the gateway process watcher is when using
terminal(background=true, check_interval=...) from messaging platforms.

Modes:
  - all:    running-output updates + final message (default, current behavior)
  - result: only the final completion message
  - error:  only the final message when exit code != 0
  - off:    no watcher messages at all

Also supports HERMES_BACKGROUND_NOTIFICATIONS env var override.

Includes 12 tests (5 config loading + 7 watcher behavior).

Inspired by @PeterFile's PR #593. Closes #592.

											
										
										
											2026-03-10 04:12:39 -07:00
+								  # Background process notifications (gateway/messaging only).
 								  # Controls how chatty the process watcher is when you use
-												refactor(terminal): remove check_interval parameter (#8001)

The check_interval parameter on terminal_tool sent periodic output
updates to the gateway chat, but these were display-only — the agent
couldn't see or act on them. This added schema bloat and introduced
a bug where notify_on_complete=True was silently dropped when
check_interval was also set (the not-check_interval guard skipped
fast-watcher registration, and the check_interval watcher dict
was missing the notify_on_complete key).

Removing check_interval entirely:
- Eliminates the notify_on_complete interaction bug
- Reduces tool schema size (one fewer parameter for the model)
- Simplifies the watcher registration path
- notify_on_complete (agent wake-on-completion) still works
- watch_patterns (output alerting) still works
- process(action='poll') covers manual status checking

Closes #7947 (root cause eliminated rather than patched).
											
										
										
											2026-04-11 17:16:11 -07:00
+								  # terminal(background=true, notify_on_complete=true) from Telegram/Discord/etc.
-												feat(gateway): configurable background process watcher notifications

Add display.background_process_notifications config option to control
how chatty the gateway process watcher is when using
terminal(background=true, check_interval=...) from messaging platforms.

Modes:
  - all:    running-output updates + final message (default, current behavior)
  - result: only the final completion message
  - error:  only the final message when exit code != 0
  - off:    no watcher messages at all

Also supports HERMES_BACKGROUND_NOTIFICATIONS env var override.

Includes 12 tests (5 config loading + 7 watcher behavior).

Inspired by @PeterFile's PR #593. Closes #592.

											
										
										
											2026-03-10 04:12:39 -07:00
+								  #   off:     No watcher messages at all
 								  #   result:  Only the final completion message
 								  #   error:   Only the final message when exit code != 0
 								  #   all:     Running output updates + final message (default)
 								  background_process_notifications: all
-												feat(honcho): async memory integration with prefetch pipeline and recallMode

Adds full Honcho memory integration to Hermes:

- Session manager with async background writes, memory modes (honcho/hybrid/local),
  and dialectic prefetch for first-turn context warming
- Agent integration: prefetch pipeline, tool surface gated by recallMode,
  system prompt context injection, SIGTERM/SIGINT flush handlers
- CLI commands: setup, status, mode, tokens, peer, identity, migrate
- recallMode setting (auto | context | tools) for A/B testing retrieval strategies
- Session strategies: per-session, per-repo (git tree root), per-directory, global
- Polymorphic memoryMode config: string shorthand or per-peer object overrides
- 97 tests covering async writes, client config, session resolution, and memory modes

											
										
										
											2026-03-09 15:58:22 -04:00
-												feat: bell_on_complete — terminal bell when agent finishes

Adds a simple config option to play the terminal bell (\a) when the
agent finishes a response. Useful for long-running tasks — switch to
another window and your terminal will ding when done.

Works over SSH since the bell character propagates through the
connection. Most terminal emulators can be configured to flash the
taskbar, play a sound, or show a visual indicator on bell.

Config (default: off):
  display:
    bell_on_complete: true

Closes #318

											
										
										
											2026-03-08 19:41:17 -07:00
+								  # Play terminal bell when agent finishes a response.
 								  # Useful for long-running tasks — your terminal will ding when the agent is done.
 								  # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
 								  bell_on_complete: false
-												docs: comprehensive skin/theme system documentation

- AGENTS.md: add Skin/Theme System section with architecture, skinnable
  elements table, built-in skins list, adding built-in/user skins guide,
  YAML example; add skin_engine.py to project structure; mention skin
  engine in CLI Architecture section
- CONTRIBUTING.md: add skin_engine.py to project structure; add 'Adding
  a Skin/Theme' section with YAML schema, activation instructions
- cli-config.yaml.example: add full skin config documentation with
  schema reference, built-in skins list, all color/spinner/branding keys
- docs/skins/example-skin.yaml: complete annotated skin template with
  all available fields and inline documentation
- hermes_cli/skin_engine.py: expand module docstring to full schema
  reference with all fields documented, usage examples, built-in skins
  list

											
										
										
											2026-03-10 00:51:27 -07:00
-												feat(cli): add /reasoning command for effort level and display toggle

Combined implementation of reasoning management:
- /reasoning              Show current effort level and display state
- /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
- /reasoning show|on      Show model thinking/reasoning in output
- /reasoning hide|off     Hide model thinking/reasoning from output

Effort level changes persist to config and force agent re-init.
Display toggle updates the agent callback dynamically without re-init.

When display is enabled:
- Intermediate reasoning shown as dim [thinking] lines during tool loops
- Final reasoning shown in a bordered box above the response
- Long reasoning collapsed (5 lines intermediate, 10 lines final)

Also adds:
- reasoning_callback parameter to AIAgent
- last_reasoning in run_conversation result dict
- show_reasoning config option (display section, default: false)
- Display section in /config output
- 34 tests covering both features

Combines functionality from PR #789 and PR #790.

Co-authored-by: Aum Desai <Aum08Desai@users.noreply.github.com>
Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>

											
										
										
											2026-03-11 05:53:21 -07:00
+								  # Show model reasoning/thinking before each response.
 								  # When enabled, a dim box shows the model's thought process above the response.
 								  # Toggle at runtime with /reasoning show or /reasoning hide.
 								  show_reasoning: false
-												docs: add streaming config to cli-config.yaml.example and defaults

Documents the new streaming options in the example config:
- display.streaming for CLI (under display section)
- streaming.enabled + transport/interval/threshold/cursor for gateway
- Added streaming: false to load_cli_config() defaults dict

											
										
										
											2026-03-16 07:53:08 -07:00
+								  # Stream tokens to the terminal as they arrive instead of waiting for the
 								  # full response. The response box opens on first token and text appears
 								  # line-by-line. Tool calls are still captured silently.
-												feat: enable streaming by default in CLI

Streaming provides a better UX — tokens appear as they arrive instead
of waiting for the full response. show_reasoning remains false so
thinking blocks are not streamed to the user.

											
										
										
											2026-03-21 09:49:47 -07:00
+								  # Stream tokens to the terminal in real-time. Disable to wait for full responses.
 								  streaming: true
-												docs: add streaming config to cli-config.yaml.example and defaults

Documents the new streaming options in the example config:
- display.streaming for CLI (under display section)
- streaming.enabled + transport/interval/threshold/cursor for gateway
- Added streaming: false to load_cli_config() defaults dict

											
										
										
											2026-03-16 07:53:08 -07:00
-												docs: comprehensive skin/theme system documentation

- AGENTS.md: add Skin/Theme System section with architecture, skinnable
  elements table, built-in skins list, adding built-in/user skins guide,
  YAML example; add skin_engine.py to project structure; mention skin
  engine in CLI Architecture section
- CONTRIBUTING.md: add skin_engine.py to project structure; add 'Adding
  a Skin/Theme' section with YAML schema, activation instructions
- cli-config.yaml.example: add full skin config documentation with
  schema reference, built-in skins list, all color/spinner/branding keys
- docs/skins/example-skin.yaml: complete annotated skin template with
  all available fields and inline documentation
- hermes_cli/skin_engine.py: expand module docstring to full schema
  reference with all fields documented, usage examples, built-in skins
  list

											
										
										
											2026-03-10 00:51:27 -07:00
+								  # ───────────────────────────────────────────────────────────────────────────
 								  # Skin / Theme
 								  # ───────────────────────────────────────────────────────────────────────────
 								  # Customize CLI visual appearance — banner colors, spinner faces, tool prefix,
 								  # response box label, and branding text. Change at runtime with /skin <name>.
 								  #
 								  # Built-in skins:
 								  #   default  — Classic Hermes gold/kawaii
 								  #   ares     — Crimson/bronze war-god theme with spinner wings
 								  #   mono     — Clean grayscale monochrome
 								  #   slate    — Cool blue developer-focused
 								  #
 								  # Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
 								  # Schema (all fields optional, missing values inherit from default):
 								  #
 								  #   name: my-theme
 								  #   description: Short description
 								  #   colors:
 								  #     banner_border: "#HEX"    # Panel border
 								  #     banner_title: "#HEX"     # Panel title
 								  #     banner_accent: "#HEX"    # Section headers (Available Tools, etc.)
 								  #     banner_dim: "#HEX"       # Dim/muted text
 								  #     banner_text: "#HEX"      # Body text (tool names, skill names)
 								  #     ui_accent: "#HEX"        # UI accent color
 								  #     response_border: "#HEX"  # Response box border color
 								  #   spinner:
 								  #     waiting_faces: ["(⚔)", "(⛨)"]       # Faces shown while waiting
 								  #     thinking_faces: ["(⚔)", "(⌁)"]      # Faces shown while thinking
 								  #     thinking_verbs: ["forging", "plotting"]  # Verbs for spinner messages
 								  #     wings:                                # Optional left/right spinner decorations
 								  #       - ["⟪⚔", "⚔⟫"]
 								  #       - ["⟪▲", "▲⟫"]
 								  #   branding:
 								  #     agent_name: "My Agent"               # Banner title and branding
 								  #     welcome: "Welcome message"           # Shown at CLI startup
 								  #     response_label: " ⚔ Agent "         # Response box header label
 								  #     prompt_symbol: "⚔ ❯ "              # Prompt symbol
 								  #   tool_prefix: "╎"                       # Tool output line prefix (default: ┊)
 								  #
 								  skin: default
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
-												fix: Ollama Cloud auth, /model switch persistence, and alias tab completion

- Add OLLAMA_API_KEY to credential resolution chain for ollama.com endpoints
- Update requested_provider/_explicit_api_key/_explicit_base_url after /model
  switch so _ensure_runtime_credentials() doesn't revert the switch
- Pass base_url/api_key from fallback config to resolve_provider_client()
- Add DirectAlias system: user-configurable model_aliases in config.yaml
  checked before catalog resolution, with reverse lookup by model ID
- Add /model tab completion showing aliases with provider metadata

Co-authored-by: LucidPaths <LucidPaths@users.noreply.github.com>

											
										
										
											2026-04-05 10:58:44 -07:00
+								# =============================================================================
 								# Model Aliases — short names for /model command
 								# =============================================================================
 								# Map short aliases to exact (model, provider, base_url) tuples.
 								# Used by /model tab completion and resolve_alias().
 								# Aliases are checked BEFORE the models.dev catalog, so they can route
 								# to endpoints not in the catalog (e.g. Ollama Cloud, local servers).
 								#
 								# model_aliases:
 								#   opus:
 								#     model: claude-opus-4-6
 								#     provider: anthropic
 								#   qwen:
 								#     model: "qwen3.5:397b"
 								#     provider: custom
 								#     base_url: "https://ollama.com/v1"
 								#   glm:
 								#     model: glm-4.7
 								#     provider: custom
 								#     base_url: "https://ollama.com/v1"
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								# =============================================================================
 								# Privacy
 								# =============================================================================
 								# privacy:
 								#   # Redact PII from the LLM context prompt.
 								#   # When true, phone numbers are stripped and user/chat IDs are replaced
 								#   # with deterministic hashes before being sent to the model.
 								#   # Names and usernames are NOT affected (user-chosen, publicly visible).
 								#   # Routing/delivery still uses the original values internally.
 								#   redact_pii: false
-												feat: shell hooks — wire shell scripts as Hermes hook callbacks

Users can declare shell scripts in config.yaml under a hooks: block that
fire on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call,
subagent_stop, etc). Scripts receive JSON on stdin, can return JSON on
stdout to block tool calls or inject context pre-LLM.

Key design:
- Registers closures on existing PluginManager._hooks dict — zero changes
  to invoke_hook() call sites
- subprocess.run(shell=False) via shlex.split — no shell injection
- First-use consent per (event, command) pair, persisted to allowlist JSON
- Bypass via --accept-hooks, HERMES_ACCEPT_HOOKS=1, or hooks_auto_accept
- hermes hooks list/test/revoke/doctor CLI subcommands
- Adds subagent_stop hook event fired after delegate_task children exit
- Claude Code compatible response shapes accepted

Cherry-picked from PR #13143 by @pefontana.

											
										
										
											2026-04-20 20:53:20 -07:00
 								# =============================================================================
 								# Shell-script hooks
 								# =============================================================================
 								# Register shell scripts as plugin-hook callbacks.  Each entry is executed as
 								# a subprocess (shell=False, shlex.split) with a JSON payload on stdin.  On
 								# stdout the script may return JSON that either blocks the tool call or
 								# injects context into the next LLM call.
 								#
 								# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
 								#   pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
 								#   pre_api_request, post_api_request, on_session_start, on_session_end,
 								#   on_session_finalize, on_session_reset, subagent_stop
 								#
 								# First-use consent: each (event, command) pair prompts once on a TTY, then
 								# is persisted to ~/.hermes/shell-hooks-allowlist.json.  Non-interactive
 								# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
 								# hooks_auto_accept key below.
 								#
 								# See website/docs/user-guide/features/hooks.md for the full JSON wire
 								# protocol and worked examples.
 								#
 								# hooks:
 								#   pre_tool_call:
 								#     - matcher: "terminal"
 								#       command: "~/.hermes/agent-hooks/block-rm-rf.sh"
 								#       timeout: 10
 								#   post_tool_call:
 								#     - matcher: "write_file|patch"
 								#       command: "~/.hermes/agent-hooks/auto-format.sh"
 								#   pre_llm_call:
 								#     - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
 								#   subagent_stop:
 								#     - command: "~/.hermes/agent-hooks/log-orchestration.sh"
 								#
 								# hooks_auto_accept: false