diff --git a/run_agent.py b/run_agent.py index 5ec62a06a2..49240d70f1 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3683,7 +3683,7 @@ class AIAgent: existing = getattr(self, "_pending_steer", None) self._pending_steer = (existing + "\n" + steer_text) if existing else steer_text return - marker = f"\n\n[USER STEER (injected mid-run, not tool output): {steer_text}]" + marker = f"\n\nUser guidance: {steer_text}" existing_content = messages[target_idx].get("content", "") if not isinstance(existing_content, str): # Anthropic multimodal content blocks — preserve them and append @@ -8979,7 +8979,7 @@ class AIAgent: for _si in range(len(messages) - 1, -1, -1): _sm = messages[_si] if isinstance(_sm, dict) and _sm.get("role") == "tool": - marker = f"\n\n[USER STEER (injected mid-run, not tool output): {_pre_api_steer}]" + marker = f"\n\nUser guidance: {_pre_api_steer}" existing = _sm.get("content", "") if isinstance(existing, str): _sm["content"] = existing + marker diff --git a/tests/run_agent/test_steer.py b/tests/run_agent/test_steer.py index 9a9e4b51cc..d99a0af805 100644 --- a/tests/run_agent/test_steer.py +++ b/tests/run_agent/test_steer.py @@ -85,7 +85,7 @@ class TestSteerInjection: # The LAST tool result is modified; earlier ones are untouched. assert messages[2]["content"] == "ls output A" assert "ls output B" in messages[3]["content"] - assert "[USER STEER" in messages[3]["content"] + assert "User guidance:" in messages[3]["content"] assert "please also check auth.log" in messages[3]["content"] # And pending_steer is consumed. assert agent._pending_steer is None @@ -107,18 +107,19 @@ class TestSteerInjection: # Steer should remain pending (nothing to drain into) assert agent._pending_steer == "steer" - def test_marker_is_unambiguous_about_origin(self): - """The injection marker must make clear the text is from the user - and not tool output — this is the cache-safe way to signal - provenance without violating message-role alternation. + def test_marker_labels_text_as_user_guidance(self): + """The injection marker must label the appended text as user + guidance so the model attributes it to the user rather than + confusing it with tool output. This is the cache-safe way to + signal provenance without violating message-role alternation. """ agent = _bare_agent() agent.steer("stop after next step") messages = [{"role": "tool", "content": "x", "tool_call_id": "1"}] agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=1) content = messages[-1]["content"] - assert "USER STEER" in content - assert "not tool output" in content.lower() or "injected mid-run" in content.lower() + assert "User guidance:" in content + assert "stop after next step" in content def test_multimodal_content_list_preserved(self): """Anthropic-style list content should be preserved, with the steer @@ -226,9 +227,9 @@ class TestPreApiCallSteerDrain: # Inject into last tool msg (mirrors the new code in run_conversation) for _si in range(len(messages) - 1, -1, -1): if messages[_si].get("role") == "tool": - messages[_si]["content"] += f"\n\n[USER STEER (injected mid-run, not tool output): {_pre_api_steer}]" + messages[_si]["content"] += f"\n\nUser guidance: {_pre_api_steer}" break - assert "[USER STEER" in messages[-1]["content"] + assert "User guidance:" in messages[-1]["content"] assert "focus on error handling" in messages[-1]["content"] assert agent._pending_steer is None @@ -270,7 +271,7 @@ class TestPreApiCallSteerDrain: assert _pre_api_steer is not None for _si in range(len(messages) - 1, -1, -1): if messages[_si].get("role") == "tool": - messages[_si]["content"] += f"\n\n[USER STEER (injected mid-run, not tool output): {_pre_api_steer}]" + messages[_si]["content"] += f"\n\nUser guidance: {_pre_api_steer}" break assert "change approach" in messages[2]["content"]