Compare commits

...

2 Commits

Author SHA1 Message Date
teknium1
29907a8695 chore(release): map islam666 for salvaged PR #41048 2026-06-07 06:36:10 -07:00
islam666
b046b69593 fix(verifier): store file-mutation footer separately from final_response (#40772)
The file-mutation verifier footer was concatenated directly into
final_response, causing TTS to speak the advisory text aloud and the
transform_llm_output plugin hook to see it as part of the model's
response.

Fix: store the footer in agent._file_mutation_verifier_footer and
include it in the result dict under 'file_mutation_verifier_footer'
instead of mutating final_response.

- conversation_loop.py: store footer separately, clear at turn start,
  add to result dict
- cli.py: display footer separately from the response panel; TTS
  already receives clean text since final_response no longer contains it
- gateway/run.py: append footer to response text sent to messaging
  platforms (users on all platforms should still see the advisory)

Fixes #40772
2026-06-07 06:35:59 -07:00
5 changed files with 206 additions and 5 deletions

View File

@@ -777,7 +777,12 @@ def run_conversation(
# present are surfaced in an advisory footer so the model cannot
# over-claim success while the file is actually unchanged on disk.
agent._turn_failed_file_mutations: Dict[str, Dict[str, Any]] = {}
# Clear any verifier footer from a previous turn. The footer is
# stored separately from final_response (see #40772 fix) and must
# not leak across turns.
agent._file_mutation_verifier_footer = None
# Record the execution thread so interrupt()/clear_interrupt() can
# scope the tool-level interrupt signal to THIS agent's thread only.
# Must be set before any thread-scoped interrupt syncing.
@@ -4740,7 +4745,12 @@ def run_conversation(
if _failed and agent._file_mutation_verifier_enabled():
footer = agent._format_file_mutation_failure_footer(_failed)
if footer:
final_response = final_response.rstrip() + "\n\n" + footer
# Store the footer separately instead of mutating
# final_response. This prevents TTS, the
# transform_llm_output plugin hook, and other
# downstream consumers from treating the advisory as
# part of the model's response text. (#40772)
agent._file_mutation_verifier_footer = footer
except Exception as _ver_err:
logger.debug("file-mutation verifier footer failed: %s", _ver_err)
@@ -4856,6 +4866,7 @@ def run_conversation(
break
# Build result with interrupt info if applicable
_verifier_footer = getattr(agent, "_file_mutation_verifier_footer", None) or None
result = {
"final_response": final_response,
"last_reasoning": last_reasoning,
@@ -4884,6 +4895,7 @@ def run_conversation(
"cost_status": agent.session_cost_status,
"cost_source": agent.session_cost_source,
"session_id": agent.session_id,
"file_mutation_verifier_footer": _verifier_footer,
}
if agent._tool_guardrail_halt_decision is not None:
result["guardrail"] = agent._tool_guardrail_halt_decision.to_metadata()

17
cli.py
View File

@@ -12772,6 +12772,14 @@ class HermesCLI:
width=self._scrollback_box_width(),
))
# Display file-mutation verifier footer separately from the
# response panel so it's visible to the user but not included
# in TTS or plugin hooks. (#40772)
if result:
_vf_footer = result.get("file_mutation_verifier_footer")
if _vf_footer:
_cprint(f"\n{_DIM}{_vf_footer}{_RST}")
# Play terminal bell when agent finishes (if enabled).
# Works over SSH — the bell propagates to the user's terminal.
@@ -12791,9 +12799,12 @@ class HermesCLI:
)
# Speak response aloud if voice TTS is enabled
# Skip batch TTS when streaming TTS already handled it
if self._voice_tts and response and not use_streaming_tts:
self._voice_speak_response_async(response)
# Skip batch TTS when streaming TTS already handled it.
# Use clean response text (without verifier footer) so TTS
# doesn't speak internal advisory text aloud. (#40772)
_voice_response = response
if self._voice_tts and _voice_response and not use_streaming_tts:
self._voice_speak_response_async(_voice_response)
# Re-queue the interrupt message (and any that arrived while we were

View File

@@ -9566,6 +9566,14 @@ class GatewayRunner:
response = agent_result.get("final_response") or ""
# Append file-mutation verifier footer to the response text
# sent to messaging platforms. The footer is stored separately
# from final_response (to keep TTS/plugin hooks clean) but
# users on all platforms should still see it. (#40772)
_vf_footer = agent_result.get("file_mutation_verifier_footer") or ""
if _vf_footer:
response = (response + "\n\n" + _vf_footer).strip()
# Convert the agent's internal "(empty)" sentinel into a
# user-friendly message. "(empty)" means the model failed to
# produce visible content after exhausting all retries (nudge,

View File

@@ -58,6 +58,7 @@ AUTHOR_MAP = {
"129007007+HeLLGURD@users.noreply.github.com": "HeLLGURD",
"290859878+synapsesx@users.noreply.github.com": "synapsesx",
"dirtyren@users.noreply.github.com": "dirtyren",
"islam666@users.noreply.github.com": "islam666",
"zhaolei.vc@bytedance.com": "zhaoleibd",
"jeffrobodie@gmail.com": "jeffrobodie-glitch",
"kyssta-exe@users.noreply.github.com": "kyssta-exe",

View File

@@ -0,0 +1,169 @@
"""Regression tests for the file-mutation verifier footer (#40772).
After #40772 the verifier footer is stored separately from final_response
in result['file_mutation_verifier_footer'], not concatenated into
final_response. TTS, transform_llm_output, and other downstream consumers
of final_response should never see the advisory text.
These are lightweight unit tests that verify the storage contract directly
without running the full conversation loop.
"""
from __future__ import annotations
import json
import pytest
from run_agent import (
AIAgent,
_extract_file_mutation_targets,
)
def _bare_agent():
"""Return a bare AIAgent (no __init__) with just the verifier attrs."""
agent = object.__new__(AIAgent)
agent._turn_failed_file_mutations = {}
agent._file_mutation_verifier_footer = None
return agent
class TestFooterStorageContract:
"""The footer must be stored in agent._file_mutation_verifier_footer
and NOT concatenated into final_response."""
def test_footer_not_in_final_response(self):
"""Simulate what conversation_loop.py does: store footer separately."""
agent = _bare_agent()
# Simulate a failed patch during a turn
agent._turn_failed_file_mutations["/tmp/test.md"] = {
"tool": "patch",
"error_preview": "Could not find old_string",
}
# Simulate the conversation loop's footer logic (the #40772 fix)
final_response = "I tried to patch the file."
interrupted = False
if final_response and not interrupted:
_failed = getattr(agent, "_turn_failed_file_mutations", None) or {}
if _failed:
footer = AIAgent._format_file_mutation_failure_footer(_failed)
if footer:
agent._file_mutation_verifier_footer = footer
# final_response must be unchanged
assert final_response == "I tried to patch the file."
assert "File-mutation verifier" not in final_response
# Footer must be stored separately
assert agent._file_mutation_verifier_footer is not None
assert "File-mutation verifier" in agent._file_mutation_verifier_footer
assert "1 file(s) were NOT modified" in agent._file_mutation_verifier_footer
def test_footer_not_visible_to_transform_llm_output(self):
"""The footer must not be in final_response, so transform_llm_output
and other consumers of final_response never see it."""
agent = _bare_agent()
agent._turn_failed_file_mutations["/tmp/a.md"] = {
"tool": "patch",
"error_preview": "old_string not found",
}
agent._turn_failed_file_mutations["/tmp/b.md"] = {
"tool": "write_file",
"error_preview": "Permission denied",
}
# Simulate the fixed conversation loop logic
final_response = "I updated both files successfully."
if final_response:
_failed = agent._turn_failed_file_mutations
if _failed:
footer = AIAgent._format_file_mutation_failure_footer(_failed)
if footer:
agent._file_mutation_verifier_footer = footer
# Simulate what transform_llm_output hook receives
hook_response_text = final_response # This is what hooks get
assert "File-mutation verifier" not in hook_response_text
assert "NOT modified" not in hook_response_text
assert "Permission denied" not in hook_response_text
# Footer is available via the side channel
assert "2 file(s)" in agent._file_mutation_verifier_footer
def test_footer_cleared_between_turns(self):
"""_file_mutation_verifier_footer is reset to None at turn start,
so a stale footer from a previous turn never leaks into the next."""
agent = _bare_agent()
# Simulate a previous turn that set a footer
agent._file_mutation_verifier_footer = (
"⚠️ File-mutation verifier: 1 file(s) were NOT modified..."
)
# Simulate the turn-start reset (conversation_loop.py line ~766)
agent._file_mutation_verifier_footer = None
assert agent._file_mutation_verifier_footer is None
def test_no_footer_when_all_mutations_succeed(self):
"""When there are no failed mutations, no footer is stored."""
agent = _bare_agent()
# No failed mutations
agent._turn_failed_file_mutations = {}
final_response = "All files updated."
if final_response:
_failed = agent._turn_failed_file_mutations
if _failed:
footer = AIAgent._format_file_mutation_failure_footer(_failed)
if footer:
agent._file_mutation_verifier_footer = footer
assert agent._file_mutation_verifier_footer is None
assert final_response == "All files updated."
def test_empty_final_response_skips_footer(self):
"""When final_response is empty/interrupted, no footer is stored.
This matches the guard in conversation_loop.py."""
agent = _bare_agent()
agent._turn_failed_file_mutations["/tmp/x.md"] = {
"tool": "patch",
"error_preview": "err",
}
final_response = "" # Empty / interrupted
interrupted = True
if final_response and not interrupted:
_failed = agent._turn_failed_file_mutations
if _failed:
footer = AIAgent._format_file_mutation_failure_footer(_failed)
if footer:
agent._file_mutation_verifier_footer = footer
assert agent._file_mutation_verifier_footer is None
def test_result_dict_includes_footer(self):
"""The conversation loop's result dict must include the footer
under 'file_mutation_verifier_footer' for CLI/gateway use."""
agent = _bare_agent()
agent._file_mutation_verifier_footer = (
"⚠️ File-mutation verifier: 1 file(s) were NOT modified..."
)
# Simulate what conversation_loop.py does at the result-building stage
_verifier_footer = getattr(agent, "_file_mutation_verifier_footer", None) or None
result = {
"final_response": "I tried to patch the file.",
"file_mutation_verifier_footer": _verifier_footer,
}
assert result["final_response"] == "I tried to patch the file."
assert result["file_mutation_verifier_footer"] is not None
assert "File-mutation verifier" in result["file_mutation_verifier_footer"]