diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index ec7ccb6209..fb1af78fcd 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -555,6 +555,21 @@ toolsets:
 #     args: ["-y", "@modelcontextprotocol/server-github"]
 #     env:
 #       GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..."
+#
+# Sampling (server-initiated LLM requests) — enabled by default.
+# Per-server config under the 'sampling' key:
+#   analysis:
+#     command: npx
+#     args: ["-y", "analysis-server"]
+#     sampling:
+#       enabled: true           # default: true
+#       model: "gemini-3-flash" # override model (optional)
+#       max_tokens_cap: 4096    # max tokens per request
+#       timeout: 30             # LLM call timeout (seconds)
+#       max_rpm: 10             # max requests per minute
+#       allowed_models: []      # model whitelist (empty = all)
+#       max_tool_rounds: 5      # tool loop limit (0 = disable)
+#       log_level: "info"       # audit verbosity
 
 # =============================================================================
 # Voice Transcription (Speech-to-Text)
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index b2f0f5290f..deb87d4835 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -29,6 +29,18 @@ Example config::
         headers:
           Authorization: "Bearer sk-..."
         timeout: 180
+      analysis:
+        command: "npx"
+        args: ["-y", "analysis-server"]
+        sampling:                    # server-initiated LLM requests
+          enabled: true              # default: true
+          model: "gemini-3-flash"    # override model (optional)
+          max_tokens_cap: 4096       # max tokens per request
+          timeout: 30                # LLM call timeout (seconds)
+          max_rpm: 10                # max requests per minute
+          allowed_models: []         # model whitelist (empty = all)
+          max_tool_rounds: 5         # tool loop limit (0 = disable)
+          log_level: "info"          # audit verbosity
 
 Features:
     - Stdio transport (command + args) and HTTP/StreamableHTTP transport (url)
@@ -37,6 +49,8 @@ Features:
     - Credential stripping in error messages returned to the LLM
     - Configurable per-server timeouts for tool calls and connections
     - Thread-safe architecture with dedicated background event loop
+    - Sampling support: MCP servers can request LLM completions via
+      sampling/createMessage (text and tool-use responses)
 
 Architecture:
     A dedicated background event loop (_mcp_loop) runs in a daemon thread.