feat: GPT tool-use steering + strip budget warnings from history

Two changes to improve tool reliability, especially for OpenAI GPT models: 1. GPT tool-use enforcement prompt: Adds GPT_TOOL_USE_GUIDANCE to the system prompt when the model name contains 'gpt' and tools are loaded. This addresses a known behavioral pattern where GPT models describe intended actions ('I will run the tests') instead of actually making tool calls. Inspired by similar steering in OpenCode (beast.txt) and Cline (GPT-5.1 variant). 2. Budget warning history stripping: Budget pressure warnings injected by _get_budget_warning() into tool results are now stripped when conversation history is replayed via run_conversation(). Previously, these turn-scoped signals persisted across turns, causing models to avoid tool calls in all subsequent messages after any turn that hit the 70-90% iteration threshold.
2026-06-13 13:49:15 +08:00 · 2026-03-27 21:02:23 -07:00
3 changed files with 155 additions and 1 deletions
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -169,6 +169,21 @@ SKILLS_GUIDANCE = (
    "Skills that aren't maintained become liabilities."
 )

+GPT_TOOL_USE_GUIDANCE = (
+    "# Tool-use enforcement\n"
+    "You MUST use your tools to take action — do not describe what you would do "
+    "or plan to do without actually doing it. When you say you will perform an "
+    "action (e.g. 'I will run the tests', 'Let me check the file', 'I will create "
+    "the project'), you MUST immediately make the corresponding tool call in the same "
+    "response. Never end your turn with a promise of future action — execute it now.\n"
+    "Keep working until the task is actually complete. Do not stop with a summary of "
+    "what you plan to do next time. If you have tools available that can accomplish "
+    "the task, use them instead of telling the user what you would do.\n"
+    "Every response should either (a) contain tool calls that make progress, or "
+    "(b) deliver a final result to the user. Responses that only describe intentions "
+    "without acting are not acceptable."
+)
+
 PLATFORM_HINTS = {
    "whatsapp": (
        "You are on a text messaging communication platform, WhatsApp. "
--- a/run_agent.py
+++ b/run_agent.py
@@ -88,7 +88,7 @@ from agent.model_metadata import (
 )
 from agent.context_compressor import ContextCompressor
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, GPT_TOOL_USE_GUIDANCE
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.display import (
    KawaiiSpinner, build_tool_preview as _build_tool_preview,
@@ -361,6 +361,43 @@ def _inject_honcho_turn_context(content, turn_context: str):
    return f"{text}\n\n{note}"


+# Budget warning text patterns injected by _get_budget_warning().
+_BUDGET_WARNING_RE = re.compile(
+    r"\[BUDGET(?:\s+WARNING)?:\s+Iteration\s+\d+/\d+\..*?\]",
+    re.DOTALL,
+)
+
+
+def _strip_budget_warnings_from_history(messages: list) -> None:
+    """Remove budget pressure warnings from tool-result messages in-place.
+
+    Budget warnings are turn-scoped signals that must not leak into replayed
+    history.  They live in tool-result ``content`` either as a JSON key
+    (``_budget_warning``) or appended plain text.
+    """
+    for msg in messages:
+        if not isinstance(msg, dict) or msg.get("role") != "tool":
+            continue
+        content = msg.get("content")
+        if not isinstance(content, str) or "_budget_warning" not in content and "[BUDGET" not in content:
+            continue
+
+        # Try JSON first (the common case: _budget_warning key in a dict)
+        try:
+            parsed = json.loads(content)
+            if isinstance(parsed, dict) and "_budget_warning" in parsed:
+                del parsed["_budget_warning"]
+                msg["content"] = json.dumps(parsed, ensure_ascii=False)
+                continue
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+        # Fallback: strip the text pattern from plain-text tool results
+        cleaned = _BUDGET_WARNING_RE.sub("", content).strip()
+        if cleaned != content:
+            msg["content"] = cleaned
+
+
 class AIAgent:
    """
    AI Agent with tool calling capabilities.
@@ -2454,6 +2491,13 @@ class AIAgent:
        if tool_guidance:
            prompt_parts.append(" ".join(tool_guidance))

+        # GPT-family models benefit from explicit tool-use enforcement.
+        # Without this, GPT models tend to describe intended actions as text
+        # ("I will run the tests") instead of actually making tool calls.
+        # Inject only when the model has tools available.
+        if self.valid_tool_names and "gpt" in (self.model or "").lower():
+            prompt_parts.append(GPT_TOOL_USE_GUIDANCE)
+
        # Honcho CLI awareness: tell Hermes about its own management commands
        # so it can refer the user to them rather than reinventing answers.
        if self._honcho and self._honcho_session_key:
@@ -5811,6 +5855,14 @@ class AIAgent:
        
        # Initialize conversation (copy to avoid mutating the caller's list)
        messages = list(conversation_history) if conversation_history else []
+
+        # Strip budget pressure warnings from previous turns.  These are
+        # turn-scoped signals injected by _get_budget_warning() into tool
+        # result content.  If left in the replayed history, models (especially
+        # GPT-family) interpret them as still-active instructions and avoid
+        # making tool calls in ALL subsequent turns.
+        if messages:
+            _strip_budget_warnings_from_history(messages)
        
        # Hydrate todo store from conversation history (gateway creates a fresh
        # AIAgent per message, so the in-memory store is empty -- we need to
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -18,6 +18,7 @@ from agent.prompt_builder import (
    build_context_files_prompt,
    CONTEXT_FILE_MAX_CHARS,
    DEFAULT_AGENT_IDENTITY,
+    GPT_TOOL_USE_GUIDANCE,
    MEMORY_GUIDANCE,
    SESSION_SEARCH_GUIDANCE,
    PLATFORM_HINTS,
@@ -926,3 +927,89 @@ class TestBuildSkillsSystemPromptConditional:
            available_toolsets=set(),
        )
        assert "nested-null" in result
+
+
+# =========================================================================
+# GPT tool-use guidance
+# =========================================================================
+
+
+class TestGPTToolUseGuidance:
+    def test_guidance_mentions_tool_calls(self):
+        assert "tool call" in GPT_TOOL_USE_GUIDANCE.lower()
+
+    def test_guidance_forbids_description_only(self):
+        assert "describe" in GPT_TOOL_USE_GUIDANCE.lower()
+        assert "promise" in GPT_TOOL_USE_GUIDANCE.lower()
+
+    def test_guidance_requires_action(self):
+        assert "MUST" in GPT_TOOL_USE_GUIDANCE
+
+
+# =========================================================================
+# Budget warning history stripping
+# =========================================================================
+
+
+class TestStripBudgetWarningsFromHistory:
+    def test_strips_json_budget_warning_key(self):
+        import json
+        from run_agent import _strip_budget_warnings_from_history
+
+        messages = [
+            {"role": "tool", "tool_call_id": "c1", "content": json.dumps({
+                "output": "hello",
+                "exit_code": 0,
+                "_budget_warning": "[BUDGET: Iteration 55/60. 5 iterations left. Start consolidating your work.]",
+            })},
+        ]
+        _strip_budget_warnings_from_history(messages)
+        parsed = json.loads(messages[0]["content"])
+        assert "_budget_warning" not in parsed
+        assert parsed["output"] == "hello"
+        assert parsed["exit_code"] == 0
+
+    def test_strips_text_budget_warning(self):
+        from run_agent import _strip_budget_warnings_from_history
+
+        messages = [
+            {"role": "tool", "tool_call_id": "c1",
+             "content": "some result\n\n[BUDGET WARNING: Iteration 58/60. Only 2 iteration(s) left. Provide your final response NOW. No more tool calls unless absolutely critical.]"},
+        ]
+        _strip_budget_warnings_from_history(messages)
+        assert messages[0]["content"] == "some result"
+
+    def test_leaves_non_tool_messages_unchanged(self):
+        from run_agent import _strip_budget_warnings_from_history
+
+        messages = [
+            {"role": "assistant", "content": "[BUDGET WARNING: Iteration 58/60. Only 2 iteration(s) left. Provide your final response NOW. No more tool calls unless absolutely critical.]"},
+            {"role": "user", "content": "hello"},
+        ]
+        original_contents = [m["content"] for m in messages]
+        _strip_budget_warnings_from_history(messages)
+        assert [m["content"] for m in messages] == original_contents
+
+    def test_handles_empty_and_missing_content(self):
+        from run_agent import _strip_budget_warnings_from_history
+
+        messages = [
+            {"role": "tool", "tool_call_id": "c1", "content": ""},
+            {"role": "tool", "tool_call_id": "c2"},
+        ]
+        _strip_budget_warnings_from_history(messages)
+        assert messages[0]["content"] == ""
+
+    def test_strips_caution_variant(self):
+        import json
+        from run_agent import _strip_budget_warnings_from_history
+
+        messages = [
+            {"role": "tool", "tool_call_id": "c1", "content": json.dumps({
+                "output": "ok",
+                "_budget_warning": "[BUDGET: Iteration 42/60. 18 iterations left. Start consolidating your work.]",
+            })},
+        ]
+        _strip_budget_warnings_from_history(messages)
+        parsed = json.loads(messages[0]["content"])
+        assert "_budget_warning" not in parsed