mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-02 08:47:26 +08:00
Port from anomalyco/opencode#24730: Moonshot's JSON Schema validator rejects two shapes that the rest of the JSON Schema ecosystem accepts: 1. $ref nodes with sibling keywords. Moonshot expands the reference before validation and then rejects the node if keys like `description`, `type`, or `default` appear alongside $ref. MCP-sourced tool schemas commonly put a `description` on $ref-typed properties so the model sees the field hint — which worked on every provider except Moonshot. 2. Tuple-style `items` arrays (positional element schemas). Moonshot's engine requires ONE schema applied to every array element. Common in tool schemas generated from Go/Protobuf that model fixed-length arrays as `[{type:number}, {type:number}]`. Repairs applied in `agent/moonshot_schema.py`: - Rule 3: when a node has `$ref`, return `{"$ref": <value>}` only (strip every sibling). The referenced definition still carries its own description on the target node, which Moonshot accepts. - Rule 4: when `items` is a list, collapse to the first element schema (falling back to `{}` which is then filled by the generic missing-type rule). Preserves `minItems` / `maxItems` / other siblings. Tests: 10 new cases across TestRefSiblingStripping + TestTupleItems, plus the existing TestMissingTypeFilled::test_ref_node_is_not_given_synthetic_type still passes (it asserted plain $ref passes through; now it passes through as exactly `{"$ref": "..."}` which is strictly compatible). All 35 tests in test_moonshot_schema.py pass.
414 lines
15 KiB
Python
414 lines
15 KiB
Python
"""Tests for Moonshot/Kimi flavored-JSON-Schema sanitizer.
|
|
|
|
Moonshot's tool-parameter validator rejects several shapes that the rest of
|
|
the JSON Schema ecosystem accepts:
|
|
|
|
1. Properties without ``type`` — Moonshot requires ``type`` on every node.
|
|
2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside
|
|
``anyOf`` children.
|
|
3. ``$ref`` with sibling keywords — Moonshot expands the ref first and then
|
|
rejects ``description``/``type`` siblings on the same node.
|
|
(Ported from anomalyco/opencode#24730.)
|
|
4. Tuple-style ``items`` arrays — Moonshot requires a single item schema,
|
|
not positional ones. (Ported from anomalyco/opencode#24730.)
|
|
|
|
These tests cover the repairs applied by ``agent/moonshot_schema.py``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from agent.moonshot_schema import (
|
|
is_moonshot_model,
|
|
sanitize_moonshot_tool_parameters,
|
|
sanitize_moonshot_tools,
|
|
)
|
|
|
|
|
|
class TestMoonshotModelDetection:
|
|
"""is_moonshot_model() must match across aggregator prefixes."""
|
|
|
|
@pytest.mark.parametrize(
|
|
"model",
|
|
[
|
|
"kimi-k2.6",
|
|
"kimi-k2-thinking",
|
|
"moonshotai/Kimi-K2.6",
|
|
"moonshotai/kimi-k2.6",
|
|
"nous/moonshotai/kimi-k2.6",
|
|
"openrouter/moonshotai/kimi-k2-thinking",
|
|
"MOONSHOTAI/KIMI-K2.6",
|
|
],
|
|
)
|
|
def test_positive_matches(self, model):
|
|
assert is_moonshot_model(model) is True
|
|
|
|
@pytest.mark.parametrize(
|
|
"model",
|
|
[
|
|
"",
|
|
None,
|
|
"anthropic/claude-sonnet-4.6",
|
|
"openai/gpt-5.4",
|
|
"google/gemini-3-flash-preview",
|
|
"deepseek-chat",
|
|
],
|
|
)
|
|
def test_negative_matches(self, model):
|
|
assert is_moonshot_model(model) is False
|
|
|
|
|
|
class TestMissingTypeFilled:
|
|
"""Rule 1: every property must carry a type."""
|
|
|
|
def test_property_without_type_gets_string(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {"query": {"description": "a bare property"}},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
assert out["properties"]["query"]["type"] == "string"
|
|
|
|
def test_property_with_enum_infers_type_from_first_value(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {"flag": {"enum": [True, False]}},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
assert out["properties"]["flag"]["type"] == "boolean"
|
|
|
|
def test_nested_properties_are_repaired(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"filter": {
|
|
"type": "object",
|
|
"properties": {
|
|
"field": {"description": "no type"},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
assert out["properties"]["filter"]["properties"]["field"]["type"] == "string"
|
|
|
|
def test_array_items_without_type_get_repaired(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"tags": {
|
|
"type": "array",
|
|
"items": {"description": "tag entry"},
|
|
},
|
|
},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
assert out["properties"]["tags"]["items"]["type"] == "string"
|
|
|
|
def test_ref_node_is_not_given_synthetic_type(self):
|
|
"""$ref nodes should NOT get a synthetic type — the referenced
|
|
definition supplies it, and Moonshot would reject the conflict."""
|
|
params = {
|
|
"type": "object",
|
|
"properties": {"payload": {"$ref": "#/$defs/Payload"}},
|
|
"$defs": {"Payload": {"type": "object", "properties": {}}},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
assert "type" not in out["properties"]["payload"]
|
|
assert out["properties"]["payload"]["$ref"] == "#/$defs/Payload"
|
|
|
|
|
|
class TestAnyOfParentType:
|
|
"""Rule 2: type must not appear at the anyOf parent level."""
|
|
|
|
def test_parent_type_stripped_when_anyof_present(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"from_format": {
|
|
"type": "string",
|
|
"anyOf": [
|
|
{"type": "string"},
|
|
{"type": "null"},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
from_format = out["properties"]["from_format"]
|
|
assert "type" not in from_format
|
|
assert "anyOf" in from_format
|
|
|
|
def test_anyof_children_missing_type_get_filled(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"value": {
|
|
"anyOf": [
|
|
{"type": "string"},
|
|
{"description": "A typeless option"},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
children = out["properties"]["value"]["anyOf"]
|
|
assert children[0]["type"] == "string"
|
|
assert "type" in children[1]
|
|
|
|
|
|
class TestRefSiblingStripping:
|
|
"""Rule 3: ``$ref`` nodes may not carry sibling keywords on Moonshot.
|
|
|
|
Ported from anomalyco/opencode#24730. The real-world failure was MCP tools
|
|
whose generated schemas put a ``description`` on a ``$ref`` property so the
|
|
model would see the field's human-readable hint. The reference stays — the
|
|
referenced definition still owns the description (on the target node itself)
|
|
and still serves the model's context.
|
|
"""
|
|
|
|
def test_description_sibling_stripped_from_ref(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"variantOptions": {
|
|
"$ref": "#/$defs/VariantOptions",
|
|
"description": "Required. The variant options for generation.",
|
|
},
|
|
},
|
|
"$defs": {
|
|
"VariantOptions": {
|
|
"type": "object",
|
|
"properties": {},
|
|
"description": "Configuration options.",
|
|
},
|
|
},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
# Sibling stripped.
|
|
assert out["properties"]["variantOptions"] == {"$ref": "#/$defs/VariantOptions"}
|
|
# The target definition's own description is preserved — we only strip
|
|
# siblings ON the $ref node, not on the thing it points at.
|
|
assert out["$defs"]["VariantOptions"]["description"] == "Configuration options."
|
|
|
|
def test_multiple_siblings_all_stripped(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"p": {
|
|
"$ref": "#/$defs/T",
|
|
"type": "object",
|
|
"description": "x",
|
|
"default": {},
|
|
"title": "P",
|
|
},
|
|
},
|
|
"$defs": {"T": {"type": "object"}},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
assert out["properties"]["p"] == {"$ref": "#/$defs/T"}
|
|
|
|
def test_ref_without_siblings_unchanged(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {"p": {"$ref": "#/$defs/T"}},
|
|
"$defs": {"T": {"type": "object"}},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
assert out["properties"]["p"] == {"$ref": "#/$defs/T"}
|
|
|
|
def test_ref_inside_anyof_children(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"v": {
|
|
"anyOf": [
|
|
{"$ref": "#/$defs/A", "description": "variant A"},
|
|
{"type": "null"},
|
|
],
|
|
},
|
|
},
|
|
"$defs": {"A": {"type": "object"}},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
children = out["properties"]["v"]["anyOf"]
|
|
assert children[0] == {"$ref": "#/$defs/A"}
|
|
assert children[1] == {"type": "null"}
|
|
|
|
|
|
class TestTupleItems:
|
|
"""Rule 4: tuple-style ``items`` arrays collapse to a single schema.
|
|
|
|
Ported from anomalyco/opencode#24730. Moonshot's schema engine requires
|
|
``items`` to be ONE schema object applied to every array element; tuple-
|
|
style positional item schemas are rejected. We collapse to the first
|
|
element's schema (which is the "closest" interpretation of positional →
|
|
single) and drop the rest.
|
|
"""
|
|
|
|
def test_tuple_items_collapsed_to_first(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"renderedSize": {
|
|
"type": "array",
|
|
"items": [{"type": "number"}, {"type": "number"}],
|
|
"minItems": 2,
|
|
"maxItems": 2,
|
|
},
|
|
},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
assert out["properties"]["renderedSize"]["items"] == {"type": "number"}
|
|
# Sibling constraints are preserved — only the tuple shape is repaired.
|
|
assert out["properties"]["renderedSize"]["minItems"] == 2
|
|
|
|
def test_empty_tuple_items_becomes_empty_schema(self):
|
|
# Empty tuple collapses to ``{}``; the generic repair then fills a
|
|
# synthetic ``type`` because Moonshot requires ``type`` on every
|
|
# schema node. Either ``{}`` or ``{"type": "string"}`` is a valid
|
|
# final shape for Moonshot — both accept any string element — but we
|
|
# always go through ``_fill_missing_type`` so the result is fully
|
|
# well-formed without needing the consumer to patch it later.
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"things": {"type": "array", "items": []},
|
|
},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
items = out["properties"]["things"]["items"]
|
|
# Must be a dict and must carry a ``type`` (the whole point of Rule 1).
|
|
assert isinstance(items, dict)
|
|
assert items.get("type")
|
|
|
|
def test_tuple_items_first_element_is_repaired(self):
|
|
# The first element itself has a missing type — it should be filled.
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"pair": {
|
|
"type": "array",
|
|
"items": [{"description": "first"}, {"description": "second"}],
|
|
},
|
|
},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
# Repaired to a single schema with a synthetic type.
|
|
assert out["properties"]["pair"]["items"] == {
|
|
"description": "first",
|
|
"type": "string",
|
|
}
|
|
|
|
def test_single_schema_items_unchanged(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"tags": {"type": "array", "items": {"type": "string"}},
|
|
},
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
assert out["properties"]["tags"]["items"] == {"type": "string"}
|
|
|
|
|
|
class TestTopLevelGuarantees:
|
|
"""The returned top-level schema is always a well-formed object."""
|
|
|
|
def test_non_dict_input_returns_empty_object(self):
|
|
assert sanitize_moonshot_tool_parameters(None) == {"type": "object", "properties": {}}
|
|
assert sanitize_moonshot_tool_parameters("garbage") == {"type": "object", "properties": {}}
|
|
assert sanitize_moonshot_tool_parameters([]) == {"type": "object", "properties": {}}
|
|
|
|
def test_non_object_top_level_coerced(self):
|
|
params = {"type": "string"}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
assert out["type"] == "object"
|
|
assert "properties" in out
|
|
|
|
def test_does_not_mutate_input(self):
|
|
params = {
|
|
"type": "object",
|
|
"properties": {"q": {"description": "no type"}},
|
|
}
|
|
snapshot = {
|
|
"type": params["type"],
|
|
"properties": {"q": dict(params["properties"]["q"])},
|
|
}
|
|
sanitize_moonshot_tool_parameters(params)
|
|
assert params["type"] == snapshot["type"]
|
|
assert "type" not in params["properties"]["q"]
|
|
|
|
|
|
class TestToolListSanitizer:
|
|
"""sanitize_moonshot_tools() walks an OpenAI-format tool list."""
|
|
|
|
def test_applies_per_tool(self):
|
|
tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "search",
|
|
"description": "Search",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {"q": {"description": "query"}},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "noop",
|
|
"description": "Does nothing",
|
|
"parameters": {"type": "object", "properties": {}},
|
|
},
|
|
},
|
|
]
|
|
out = sanitize_moonshot_tools(tools)
|
|
assert out[0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
|
|
# Second tool already clean — should be structurally equivalent
|
|
assert out[1]["function"]["parameters"] == {"type": "object", "properties": {}}
|
|
|
|
def test_empty_list_is_passthrough(self):
|
|
assert sanitize_moonshot_tools([]) == []
|
|
assert sanitize_moonshot_tools(None) is None
|
|
|
|
def test_skips_malformed_entries(self):
|
|
"""Entries without a function dict are passed through untouched."""
|
|
tools = [{"type": "function"}, {"not": "a tool"}]
|
|
out = sanitize_moonshot_tools(tools)
|
|
assert out == tools
|
|
|
|
|
|
class TestRealWorldMCPShape:
|
|
"""End-to-end: a realistic MCP-style schema that used to 400 on Moonshot."""
|
|
|
|
def test_combined_rewrites(self):
|
|
# Shape: missing type on a property, anyOf with parent type, array
|
|
# items without type — all in one tool.
|
|
params = {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {"description": "search text"},
|
|
"filter": {
|
|
"type": "string",
|
|
"anyOf": [
|
|
{"type": "string"},
|
|
{"type": "null"},
|
|
],
|
|
},
|
|
"tags": {
|
|
"type": "array",
|
|
"items": {"description": "tag"},
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
}
|
|
out = sanitize_moonshot_tool_parameters(params)
|
|
assert out["properties"]["query"]["type"] == "string"
|
|
assert "type" not in out["properties"]["filter"]
|
|
assert out["properties"]["filter"]["anyOf"][0]["type"] == "string"
|
|
assert out["properties"]["tags"]["items"]["type"] == "string"
|
|
assert out["required"] == ["query"]
|