From e0fa2cf97259ff8f29da5b3e016bcae987517c8b Mon Sep 17 00:00:00 2001 From: Sanjays2402 <51058514+Sanjays2402@users.noreply.github.com> Date: Wed, 29 Apr 2026 00:50:32 -0700 Subject: [PATCH] fix(tools): isolate get_tool_definitions quiet_mode cache + dedup LCM injection (#17335) Long-lived Gateway processes were sending duplicate tool names to providers that enforce uniqueness: - DeepSeek: 'Tool names must be unique.' - Xiaomi MiMo: 'tools contains duplicate names: lcm_expand' - Moonshot/Kimi: 'function name lcm_grep is duplicated' TUI was unaffected because TUI runs with quiet_mode=False and skips the cache entirely. Root cause (two layered bugs) - model_tools.get_tool_definitions(quiet_mode=True) memoizes its result in _tool_defs_cache. The cache-hit path returned list(cached) (safe), but the FIRST uncached call stored and returned the SAME object. run_agent.py mutates self.tools (memory + LCM context-engine schemas) in-place, so the very first agent init in a Gateway process poisoned the cache, and every subsequent init appended LCM schemas again on top of the already-polluted list. - run_agent.py's context-engine injection (lcm_grep / lcm_describe / lcm_expand) had no dedup, unlike the memory-tools injection right above it which already skips already-present names. Fix (defense in depth, per the issue's suggested fix) - model_tools.get_tool_definitions: on the uncached branch, cache the computed list but return list(result) to the caller. Same pattern as the cache-hit path. - run_agent.py: build _existing_tool_names from self.tools and skip schemas whose names are already present, mirroring the memory-tools block. This also defends against plugin paths that may register the same schemas via ctx.register_tool(). Tests (tests/test_get_tool_definitions_cache_isolation.py) - test_first_uncached_call_returns_fresh_list \u2014 pins the fix; without it, first-call alias caused all the symptoms. - test_cache_hit_returns_fresh_list \u2014 pre-existing behavior stays. - test_caller_mutation_does_not_poison_cache \u2014 simulates run_agent appending lcm_grep / lcm_expand to the returned list and asserts the next call doesn't see them. - test_repeated_caller_mutation_does_not_accumulate \u2014 reproduces the long-lived Gateway accumulation pattern across 5 agent inits. - test_non_quiet_mode_does_not_use_cache \u2014 sanity, explains why TUI was fine. 5/5 pass on the new file; 23/23 still pass on tests/test_model_tools.py. --- model_tools.py | 8 ++ run_agent.py | 19 +++- ...st_get_tool_definitions_cache_isolation.py | 94 +++++++++++++++++++ 3 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 tests/test_get_tool_definitions_cache_isolation.py diff --git a/model_tools.py b/model_tools.py index 25830d2e5a0..b991780a618 100644 --- a/model_tools.py +++ b/model_tools.py @@ -320,7 +320,15 @@ def get_tool_definitions( result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode) if quiet_mode: + # Cache the freshly-computed list, but hand callers a shallow copy so + # downstream mutations (e.g. run_agent appending memory/LCM tool + # schemas to self.tools) don't poison the cache. Without this, a + # long-lived Gateway process accumulates duplicate tool names across + # agent inits and providers that enforce unique tool names + # (DeepSeek, Xiaomi MiMo, Moonshot Kimi) reject the request with + # HTTP 400. Mirrors the cache-hit path above. (issue #17335) _tool_defs_cache[cache_key] = result + return list(result) return result diff --git a/run_agent.py b/run_agent.py index 9788a5ddc66..e37e1bb596e 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2001,16 +2001,31 @@ class AIAgent: f"model.context_length in config.yaml to override." ) - # Inject context engine tool schemas (e.g. lcm_grep, lcm_describe, lcm_expand) + # Inject context engine tool schemas (e.g. lcm_grep, lcm_describe, lcm_expand). + # Skip names that are already present — the get_tool_definitions() + # quiet_mode cache returned a shared list pre-#17335, so a stray + # mutation here would poison subsequent agent inits in the same + # Gateway process and trip provider-side 'duplicate tool name' + # errors. Even with the cache fix, dedup is the right defense + # against plugin paths that may register the same schemas via + # ctx.register_tool(). Mirrors the memory tools dedup above. self._context_engine_tool_names: set = set() if hasattr(self, "context_compressor") and self.context_compressor and self.tools is not None: + _existing_tool_names = { + t.get("function", {}).get("name") + for t in self.tools + if isinstance(t, dict) + } for _schema in self.context_compressor.get_tool_schemas(): + _tname = _schema.get("name", "") + if _tname and _tname in _existing_tool_names: + continue # already registered via plugin/cache path _wrapped = {"type": "function", "function": _schema} self.tools.append(_wrapped) - _tname = _schema.get("name", "") if _tname: self.valid_tool_names.add(_tname) self._context_engine_tool_names.add(_tname) + _existing_tool_names.add(_tname) # Notify context engine of session start if hasattr(self, "context_compressor") and self.context_compressor: diff --git a/tests/test_get_tool_definitions_cache_isolation.py b/tests/test_get_tool_definitions_cache_isolation.py new file mode 100644 index 00000000000..b92ef9dc454 --- /dev/null +++ b/tests/test_get_tool_definitions_cache_isolation.py @@ -0,0 +1,94 @@ +"""Regression tests for issue #17335. + +The ``quiet_mode=True`` fast path in :func:`model_tools.get_tool_definitions` +memoizes results to avoid re-walking the registry on every Gateway call. The +cached object must NOT be aliased into callers' return values \u2014 long-lived +Gateway processes mutate the returned list (``run_agent`` appends memory and +LCM context-engine tool schemas to ``self.tools``), and a shared list would +poison subsequent agent inits with duplicate tool names. Providers that +enforce uniqueness (DeepSeek, Xiaomi MiMo, Moonshot/Kimi) then reject the +API call with HTTP 400. + +These tests pin: +- the cache-hit path returns a fresh list (existing #17098 behavior) +- the first uncached call also returns a fresh list (the fix) +- every call returns a list that is not the cached one, even after mutation +""" +from __future__ import annotations + +import pytest + +import model_tools + + +@pytest.fixture(autouse=True) +def _clear_cache(): + """Each test starts with an empty quiet_mode cache.""" + model_tools._tool_defs_cache.clear() + yield + model_tools._tool_defs_cache.clear() + + +class TestQuietModeCacheIsolation: + + def test_first_uncached_call_returns_fresh_list(self): + """The first quiet_mode call must not alias the cached object \u2014 + otherwise a caller mutating the returned list mutates the cache.""" + first = model_tools.get_tool_definitions(quiet_mode=True) + assert isinstance(first, list) + # Find the cached value to compare identity. + assert len(model_tools._tool_defs_cache) == 1 + cached = next(iter(model_tools._tool_defs_cache.values())) + assert first is not cached, ( + "issue #17335: first quiet_mode call returned the cached list " + "by reference \u2014 mutations will leak into subsequent calls." + ) + + def test_cache_hit_returns_fresh_list(self): + """The cache-hit path already returned a copy pre-fix; pin it.""" + first = model_tools.get_tool_definitions(quiet_mode=True) + second = model_tools.get_tool_definitions(quiet_mode=True) + assert first is not second + cached = next(iter(model_tools._tool_defs_cache.values())) + assert second is not cached + + def test_caller_mutation_does_not_poison_cache(self): + """Simulate run_agent appending LCM tool schemas to the returned + list. A second call must NOT see those appended entries.""" + first = model_tools.get_tool_definitions(quiet_mode=True) + baseline_len = len(first) + # Caller mutates the returned list (this is what run_agent does + # when it injects memory + context-engine tool schemas). + first.append({"type": "function", "function": {"name": "lcm_grep"}}) + first.append({"type": "function", "function": {"name": "lcm_expand"}}) + + second = model_tools.get_tool_definitions(quiet_mode=True) + # Length must match the original \u2014 cache pollution would make + # second 2 entries longer. + assert len(second) == baseline_len, ( + f"issue #17335: cache was polluted by caller mutation. " + f"first len={baseline_len}, mutated len={len(first)}, " + f"second-call len={len(second)} \u2014 expected {baseline_len}." + ) + names = [t.get("function", {}).get("name") for t in second] + assert "lcm_grep" not in names + assert "lcm_expand" not in names + + def test_repeated_caller_mutation_does_not_accumulate(self): + """The original Gateway symptom: every agent init in a long-lived + process appends LCM schemas, accumulating duplicates over time.""" + baseline = len(model_tools.get_tool_definitions(quiet_mode=True)) + for _ in range(5): + tools = model_tools.get_tool_definitions(quiet_mode=True) + tools.append({"type": "function", "function": {"name": "lcm_grep"}}) + final = model_tools.get_tool_definitions(quiet_mode=True) + assert len(final) == baseline, ( + f"Cache accumulated mutations across {5} agent inits: " + f"baseline={baseline}, final={len(final)}." + ) + + def test_non_quiet_mode_does_not_use_cache(self): + """Sanity: quiet_mode=False (TUI path) skips the cache entirely \u2014 + explains why the bug only hit Gateway.""" + model_tools.get_tool_definitions(quiet_mode=False) + assert len(model_tools._tool_defs_cache) == 0