diff --git a/cli-config.yaml.example b/cli-config.yaml.example index ec7ccb6209..fb1af78fcd 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -555,6 +555,21 @@ toolsets: # args: ["-y", "@modelcontextprotocol/server-github"] # env: # GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..." +# +# Sampling (server-initiated LLM requests) — enabled by default. +# Per-server config under the 'sampling' key: +# analysis: +# command: npx +# args: ["-y", "analysis-server"] +# sampling: +# enabled: true # default: true +# model: "gemini-3-flash" # override model (optional) +# max_tokens_cap: 4096 # max tokens per request +# timeout: 30 # LLM call timeout (seconds) +# max_rpm: 10 # max requests per minute +# allowed_models: [] # model whitelist (empty = all) +# max_tool_rounds: 5 # tool loop limit (0 = disable) +# log_level: "info" # audit verbosity # ============================================================================= # Voice Transcription (Speech-to-Text) diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index b2f0f5290f..deb87d4835 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -29,6 +29,18 @@ Example config:: headers: Authorization: "Bearer sk-..." timeout: 180 + analysis: + command: "npx" + args: ["-y", "analysis-server"] + sampling: # server-initiated LLM requests + enabled: true # default: true + model: "gemini-3-flash" # override model (optional) + max_tokens_cap: 4096 # max tokens per request + timeout: 30 # LLM call timeout (seconds) + max_rpm: 10 # max requests per minute + allowed_models: [] # model whitelist (empty = all) + max_tool_rounds: 5 # tool loop limit (0 = disable) + log_level: "info" # audit verbosity Features: - Stdio transport (command + args) and HTTP/StreamableHTTP transport (url) @@ -37,6 +49,8 @@ Features: - Credential stripping in error messages returned to the LLM - Configurable per-server timeouts for tool calls and connections - Thread-safe architecture with dedicated background event loop + - Sampling support: MCP servers can request LLM completions via + sampling/createMessage (text and tool-use responses) Architecture: A dedicated background event loop (_mcp_loop) runs in a daemon thread.