Compare commits

...

1 Commits

Author SHA1 Message Date
Chris Danis
bc98ea5f03 schema_sanitizer: strip pattern and format from tool schemas for llama.cpp compatibility
llama.cpp's json-schema-to-grammar converter doesn't support standard JSON Schema
pattern/escape syntax (e.g. "\d{4,4}"), causing HTTP 400 errors when MCP servers
expose tool definitions with date-time or other regex patterns.

These fields are purely descriptive hints for LLM prompting — the actual runtime
validation happens on the Python side via the MCP tool's type checking, so dropping
them is safe.

- Add pattern/format stripping to _sanitize_node() in schema_sanitizer.py
- Add 3 tests: string pattern stripped, format stripped, nested anyOf stripped
2026-05-04 21:08:13 -07:00
2 changed files with 59 additions and 0 deletions

View File

@@ -203,3 +203,50 @@ def test_empty_tools_list_returns_empty():
def test_none_tools_returns_none():
assert sanitize_tool_schemas(None) is None
def test_string_pattern_stripped():
"""llama.cpp's grammar converter chokes on regex escapes like \\d."""
tools = [_tool("t", {
"type": "object",
"properties": {
"date": {"type": "string", "pattern": "\\d{4,4}-\\d{2,2}-\\d{2,2}"},
},
})]
out = sanitize_tool_schemas(tools)
prop = out[0]["function"]["parameters"]["properties"]["date"]
assert "pattern" not in prop
assert prop["type"] == "string"
def test_string_format_stripped():
"""Format fields are also stripped — safe because runtime validation catches them."""
tools = [_tool("t", {
"type": "object",
"properties": {
"ts": {"type": "string", "format": "date-time"},
},
})]
out = sanitize_tool_schemas(tools)
prop = out[0]["function"]["parameters"]["properties"]["ts"]
assert "format" not in prop
def test_nested_pattern_in_anyof_stripped():
"""Pattern inside anyOf variants is also stripped."""
tools = [_tool("t", {
"type": "object",
"properties": {
"value": {
"anyOf": [
{"type": "string", "pattern": "[A-Z]+"},
{"type": "integer"},
],
},
},
})]
out = sanitize_tool_schemas(tools)
variants = out[0]["function"]["parameters"]["properties"]["value"]["anyOf"]
assert "pattern" not in variants[0]
assert variants[0]["type"] == "string"

View File

@@ -235,6 +235,18 @@ def _sanitize_node(node: Any, path: str) -> Any:
# literal strings like "path" as bare-string schemas and replace
# them with {"type": "object"} dicts. Pass through unchanged.
out[key] = copy.deepcopy(value) if isinstance(value, (list, dict)) else value
elif key in {"pattern", "format"}:
# Strip JSON Schema ``pattern`` and ``format`` fields entirely.
# llama.cpp's json-schema-to-grammar converter chokes on regex
# escapes in pattern strings (e.g. "\\d{4,4}") — it doesn't
# support standard PCRE/ECMAScript escape sequences. These
# fields are purely descriptive hints for LLM prompting; the
# actual runtime validation happens on the Python side via the
# MCP tool's own type checking, so dropping them is safe.
logger.debug(
"schema_sanitizer[%s]: stripping %r (llama.cpp grammar incompatibility)",
path, key,
)
else:
out[key] = _sanitize_node(value, f"{path}.{key}") if isinstance(value, (dict, list)) else value