Compare commits

...

2 Commits

Author SHA1 Message Date
Teknium
2e48537cf1 chore(release): map xxxigm author email 2026-06-13 14:58:19 -07:00
xxxigm
af1477d812 fix(codex): bound leaked tool-call scan to prefix window 2026-06-13 14:58:19 -07:00
3 changed files with 53 additions and 1 deletions

View File

@@ -70,6 +70,21 @@ _TOOL_CALL_LEAK_PATTERN = re.compile(
r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
re.IGNORECASE,
)
_TOOL_CALL_LEAK_SCAN_LIMIT = 8192
def _scan_for_leaked_tool_call(text: str) -> bool:
"""Return True if Codex leaked a Harmony tool-call marker near the start.
Real leaked tool-call serializations begin with the Harmony marker. Bound
the regex to a prefix window so multi-megabyte successful assistant output
never spends unbounded GIL time proving it does not contain a leak.
"""
return bool(
_TOOL_CALL_LEAK_PATTERN.search(
text[:_TOOL_CALL_LEAK_SCAN_LIMIT + 64]
)
)
# ---------------------------------------------------------------------------
@@ -1227,7 +1242,7 @@ def _normalize_codex_response(
# ``function_call`` item. The existing loop already handles message
# append, dedup, and retry budget.
leaked_tool_call_text = False
if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
if final_text and not tool_calls and _scan_for_leaked_tool_call(final_text):
leaked_tool_call_text = True
logger.warning(
"Codex response contains leaked tool-call text in assistant content "

View File

@@ -992,6 +992,7 @@ AUTHOR_MAP = {
"tuancanhnguyen706@gmail.com": "xxxigm",
"larcombe.n@gmail.com": "NickLarcombe",
"54813621+xxxigm@users.noreply.github.com": "xxxigm",
"xxxigm@users.noreply.github.com": "xxxigm",
"asurla@nvidia.com": "anniesurla",
"kchantharuan@nvidia.com": "nv-kasikritc",
"bbednarski@nvidia.com": "bbednarski9",

View File

@@ -1520,6 +1520,42 @@ def test_normalize_codex_response_detects_leaked_tool_call_text(monkeypatch):
assert assistant_message.tool_calls == []
def test_scan_for_leaked_tool_call_checks_prefix_window_only(monkeypatch):
from agent.codex_responses_adapter import (
_TOOL_CALL_LEAK_SCAN_LIMIT,
_scan_for_leaked_tool_call,
)
marker = "to=functions.terminal {\"command\": \"pwd\"}"
assert _scan_for_leaked_tool_call(marker) is True
assert _scan_for_leaked_tool_call("x" * (_TOOL_CALL_LEAK_SCAN_LIMIT - 10) + " " + marker) is True
assert _scan_for_leaked_tool_call("x" * (_TOOL_CALL_LEAK_SCAN_LIMIT + 10) + marker) is False
def test_normalize_codex_response_ignores_late_tool_call_marker_past_scan_window(monkeypatch):
from agent.codex_responses_adapter import _TOOL_CALL_LEAK_SCAN_LIMIT, _normalize_codex_response
late_marker = "x" * (_TOOL_CALL_LEAK_SCAN_LIMIT + 100) + " to=functions.terminal {\"command\": \"pwd\"}"
response = SimpleNamespace(
output=[
SimpleNamespace(
type="message",
status="completed",
content=[SimpleNamespace(type="output_text", text=late_marker)],
)
],
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
status="completed",
model="gpt-5.4",
)
assistant_message, finish_reason = _normalize_codex_response(response)
assert finish_reason == "stop"
assert assistant_message.content == late_marker
def test_normalize_codex_response_ignores_tool_call_text_when_real_tool_call_present(monkeypatch):
"""If the model emitted BOTH a structured function_call AND some text that
happens to contain `to=functions.*` (unlikely but possible), trust the