Compare commits

...

1 Commits

Author SHA1 Message Date
Teknium
9a85c99d58 fix(mcp): validate remote URLs up-front with a clear error
Port from anomalyco/opencode#25019 ("fix: handle invalid mcp urls").

Previously: a typo in `config.yaml` (missing scheme, wrong scheme,
empty string, non-string value) slipped past `_is_http()` and hit
`httpx.URL(url)` or `streamablehttp_client(url, ...)` deep in the
transport layer. That raised a generic exception which went through
the reconnect-backoff loop, so a bad URL caused _MAX_INITIAL_CONNECT_RETRIES
attempts with doubling backoff — about a minute of pointless retries
plus an opaque error — before the server was marked failed.

Now: we validate the URL once, at the top of `run()`, before
entering the retry loop. A malformed URL raises `InvalidMcpUrlError`
(a `ValueError` subclass) with a message that names the offending
server and explains exactly what was wrong. `_ready` is set and
`_error` is populated, so `start()` re-raises and the server shows
up as failed in `hermes mcp list` without any backoff burn.

Validation rules:
- Must be a string (rejects None, dict, int)
- Must be non-empty (rejects '' and whitespace-only)
- Scheme must be http or https (rejects file://, ws://, stdio://)
- Must have a non-empty host (rejects http:///, http://:8080)

Tests (21 new cases in tests/tools/test_mcp_invalid_url.py):
- TestValidUrlsAccepted: http, https, IPv6, ports, paths, query strings
- TestInvalidUrlsRejected: every rejection path above + clear error text
- TestErrorIsValueError: downstream code catching ValueError still works

E2E verified: a misconfigured server with `url: not-a-valid-url`
now fails in <0.001s with the clear error, instead of minutes of retries.

Doesn't touch stdio servers (they use `command`, not `url`) — the
validator only fires when `_is_http()` returns True.
2026-04-30 17:11:02 -07:00
2 changed files with 202 additions and 0 deletions

View File

@@ -0,0 +1,125 @@
"""Tests for the MCP remote-URL validator.
Ported from anomalyco/opencode#25019 (``fix: handle invalid mcp urls``).
Previously, a typo in ``config.yaml`` (missing scheme, wrong scheme, empty
string, dict where a URL was expected) caused the MCP server startup code
to enter httpx's URL-parsing path and crash inside the transport layer.
The reconnect-backoff loop would then retry
``_MAX_INITIAL_CONNECT_RETRIES`` times with doubling backoff — a minute or
more of pointless retries plus a confusing opaque error message — before
eventually giving up.
The fix validates the URL once, up front, and fails fast with a specific
error message identifying the offending server.
"""
from __future__ import annotations
import pytest
from tools.mcp_tool import (
InvalidMcpUrlError,
_validate_remote_mcp_url,
)
class TestValidUrlsAccepted:
"""Every valid http(s) URL must pass through untouched (stripped of whitespace)."""
@pytest.mark.parametrize(
"url",
[
"http://localhost:3000/mcp",
"https://example.com/mcp",
"https://context7.liam.com/mcp",
"http://127.0.0.1:8080",
"https://api.example.com:443/v1/mcp?session=abc",
"http://[::1]:9000/mcp", # IPv6
"https://host.example.com", # no port, no path
],
)
def test_accepts_valid_http_url(self, url):
assert _validate_remote_mcp_url("test", url) == url
def test_strips_surrounding_whitespace(self):
assert (
_validate_remote_mcp_url("test", " https://example.com/mcp ")
== "https://example.com/mcp"
)
class TestInvalidUrlsRejected:
"""Every broken shape must raise ``InvalidMcpUrlError`` with a clear message."""
def test_none_rejected(self):
with pytest.raises(InvalidMcpUrlError, match="context7.*expected a string"):
_validate_remote_mcp_url("context7", None)
def test_dict_rejected(self):
with pytest.raises(InvalidMcpUrlError, match="expected a string, got dict"):
_validate_remote_mcp_url("ctx", {"url": "nested"})
def test_int_rejected(self):
with pytest.raises(InvalidMcpUrlError, match="expected a string, got int"):
_validate_remote_mcp_url("ctx", 8080)
def test_empty_string_rejected(self):
with pytest.raises(InvalidMcpUrlError, match="empty url"):
_validate_remote_mcp_url("ctx", "")
def test_whitespace_only_rejected(self):
with pytest.raises(InvalidMcpUrlError, match="empty url"):
_validate_remote_mcp_url("ctx", " \t\n")
def test_missing_scheme_rejected(self):
# The most common typo — users copy a host from a web page.
with pytest.raises(
InvalidMcpUrlError, match="scheme must be http or https"
):
_validate_remote_mcp_url("ctx", "example.com/mcp")
def test_file_scheme_rejected(self):
with pytest.raises(
InvalidMcpUrlError, match="scheme must be http or https"
):
_validate_remote_mcp_url("ctx", "file:///etc/passwd")
def test_ws_scheme_rejected(self):
# WebSocket is not MCP's remote transport.
with pytest.raises(
InvalidMcpUrlError, match="scheme must be http or https"
):
_validate_remote_mcp_url("ctx", "ws://example.com/mcp")
def test_stdio_scheme_rejected(self):
# stdio servers use the ``command`` key, not ``url``.
with pytest.raises(
InvalidMcpUrlError, match="scheme must be http or https"
):
_validate_remote_mcp_url("ctx", "stdio:///node server.js")
def test_empty_host_rejected(self):
with pytest.raises(InvalidMcpUrlError, match="missing host"):
_validate_remote_mcp_url("ctx", "http:///")
def test_empty_host_with_path_rejected(self):
with pytest.raises(InvalidMcpUrlError, match="missing host"):
_validate_remote_mcp_url("ctx", "https:///path/only")
def test_error_mentions_server_name(self):
# So users can find the bad entry when there are multiple configured.
with pytest.raises(InvalidMcpUrlError, match="my-weird-server"):
_validate_remote_mcp_url("my-weird-server", "not a url at all")
class TestErrorIsValueError:
"""InvalidMcpUrlError must be a ValueError for broad downstream catch blocks."""
def test_is_value_error(self):
try:
_validate_remote_mcp_url("ctx", "garbage")
except ValueError:
pass # expected
else:
pytest.fail("expected ValueError")

View File

@@ -83,6 +83,7 @@ import threading
import time import time
from datetime import datetime from datetime import datetime
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from urllib.parse import urlparse
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -403,6 +404,67 @@ def _resolve_stdio_command(command: str, env: dict) -> tuple[str, dict]:
return resolved_command, resolved_env return resolved_command, resolved_env
class InvalidMcpUrlError(ValueError):
"""Raised when a remote MCP server's ``url`` cannot be parsed as http(s)://.
Validated once at startup so we fail fast with a clear message instead of
burning through the reconnect-backoff loop on every attempt. (Ported from
anomalyco/opencode#25019.)
"""
def _validate_remote_mcp_url(server_name: str, url: Any) -> str:
"""Return the URL as a string if it's a valid http(s) remote MCP URL.
Raises :class:`InvalidMcpUrlError` otherwise with a message naming the
offending server, so users can spot the bad entry in their config.
Accepts:
- ``http://host`` / ``https://host`` with optional port, path, query
- IPv4, IPv6 (bracketed), DNS hostnames
Rejects:
- Non-string values (``None``, dicts, ints)
- Missing scheme (``example.com/mcp``)
- Non-http(s) schemes (``file://``, ``ws://``, ``stdio:`` — stdio servers
use the ``command`` key, not ``url``)
- Empty host (``http://``, ``https:///path``)
"""
if not isinstance(url, str):
raise InvalidMcpUrlError(
f"Invalid MCP URL for '{server_name}': expected a string, got "
f"{type(url).__name__}"
)
stripped = url.strip()
if not stripped:
raise InvalidMcpUrlError(
f"Invalid MCP URL for '{server_name}': empty url"
)
try:
parsed = urlparse(stripped)
except Exception as exc: # urlparse is very permissive — belt and braces
raise InvalidMcpUrlError(
f"Invalid MCP URL for '{server_name}': {stripped!r} ({exc})"
) from exc
if parsed.scheme.lower() not in ("http", "https"):
raise InvalidMcpUrlError(
f"Invalid MCP URL for '{server_name}': scheme must be http or "
f"https, got {parsed.scheme!r} ({stripped!r})"
)
if not parsed.netloc:
raise InvalidMcpUrlError(
f"Invalid MCP URL for '{server_name}': missing host ({stripped!r})"
)
# ``urlparse`` accepts ``http://:8080`` (empty host, explicit port).
# Reject that — we need a real host.
if not parsed.hostname:
raise InvalidMcpUrlError(
f"Invalid MCP URL for '{server_name}': missing hostname "
f"({stripped!r})"
)
return stripped
def _format_connect_error(exc: BaseException) -> str: def _format_connect_error(exc: BaseException) -> str:
"""Render nested MCP connection errors into an actionable short message.""" """Render nested MCP connection errors into an actionable short message."""
@@ -1287,6 +1349,21 @@ class MCPServerTask:
"this warning.", "this warning.",
self.name, self.name,
) )
# Validate remote URL once, up front. Raising here (rather than
# letting it blow up inside the SDK's httpx layer on every retry)
# means a typo in config.yaml fails fast with a clear error — and
# critically, no reconnect-backoff burn. (Ported from
# anomalyco/opencode#25019.)
if self._is_http():
try:
_validate_remote_mcp_url(self.name, config.get("url"))
except InvalidMcpUrlError as exc:
logger.warning("%s", exc)
self._error = exc
self._ready.set()
return
retries = 0 retries = 0
initial_retries = 0 initial_retries = 0
backoff = 1.0 backoff = 1.0