mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 23:11:37 +08:00
Compare commits
1 Commits
skill/gith
...
feat/fix-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cc66b666e5 |
21
run_agent.py
21
run_agent.py
@@ -6649,8 +6649,8 @@ class AIAgent:
|
|||||||
# Plugin hook: pre_llm_call
|
# Plugin hook: pre_llm_call
|
||||||
# Fired once per turn before the tool-calling loop. Plugins can
|
# Fired once per turn before the tool-calling loop. Plugins can
|
||||||
# return a dict with a ``context`` key whose value is a string
|
# return a dict with a ``context`` key whose value is a string
|
||||||
# that will be appended to the ephemeral system prompt for every
|
# that will be injected at request time for every API call in
|
||||||
# API call in this turn (not persisted to session DB or cache).
|
# this turn (not persisted to session DB or cached prefix).
|
||||||
_plugin_turn_context = ""
|
_plugin_turn_context = ""
|
||||||
try:
|
try:
|
||||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||||
@@ -6796,8 +6796,11 @@ class AIAgent:
|
|||||||
effective_system = active_system_prompt or ""
|
effective_system = active_system_prompt or ""
|
||||||
if self.ephemeral_system_prompt:
|
if self.ephemeral_system_prompt:
|
||||||
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
|
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
|
||||||
# Plugin context from pre_llm_call hooks — ephemeral, not cached.
|
# Plugin context from pre_llm_call hooks.
|
||||||
if _plugin_turn_context:
|
# For non-cached providers/requests we can append directly.
|
||||||
|
# For Anthropic prompt-cached requests we inject it later as an
|
||||||
|
# uncached system suffix block so the cache key stays stable.
|
||||||
|
if _plugin_turn_context and not self._use_prompt_caching:
|
||||||
effective_system = (effective_system + "\n\n" + _plugin_turn_context).strip()
|
effective_system = (effective_system + "\n\n" + _plugin_turn_context).strip()
|
||||||
if effective_system:
|
if effective_system:
|
||||||
api_messages = [{"role": "system", "content": effective_system}] + api_messages
|
api_messages = [{"role": "system", "content": effective_system}] + api_messages
|
||||||
@@ -6816,6 +6819,16 @@ class AIAgent:
|
|||||||
if self._use_prompt_caching:
|
if self._use_prompt_caching:
|
||||||
api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages'))
|
api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages'))
|
||||||
|
|
||||||
|
# Append plugin context AFTER cache markers so the system-level
|
||||||
|
# cache key stays stable even when plugin output varies per turn.
|
||||||
|
if _plugin_turn_context and api_messages and api_messages[0].get("role") == "system":
|
||||||
|
_sys = api_messages[0].get("content", "")
|
||||||
|
_blocks = list(_sys) if isinstance(_sys, list) else [{"type": "text", "text": _sys}] if isinstance(_sys, str) else []
|
||||||
|
_blocks.append({"type": "text", "text": _plugin_turn_context})
|
||||||
|
api_messages[0]["content"] = _blocks
|
||||||
|
elif _plugin_turn_context:
|
||||||
|
api_messages.insert(0, {"role": "system", "content": _plugin_turn_context})
|
||||||
|
|
||||||
# Safety net: strip orphaned tool results / add stubs for missing
|
# Safety net: strip orphaned tool results / add stubs for missing
|
||||||
# results before sending to the API. Runs unconditionally — not
|
# results before sending to the API. Runs unconditionally — not
|
||||||
# gated on context_compressor — so orphans from session loading or
|
# gated on context_compressor — so orphans from session loading or
|
||||||
|
|||||||
@@ -1573,6 +1573,40 @@ class TestRunConversation:
|
|||||||
assert "Local/custom backend returned reasoning-only output" in result["error"]
|
assert "Local/custom backend returned reasoning-only output" in result["error"]
|
||||||
assert "wrong /v1 endpoint" in result["error"]
|
assert "wrong /v1 endpoint" in result["error"]
|
||||||
|
|
||||||
|
def test_plugin_context_is_uncached_system_suffix_when_prompt_caching_enabled(self, agent):
|
||||||
|
self._setup_agent(agent)
|
||||||
|
agent._use_prompt_caching = True
|
||||||
|
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def _fake_api_call(api_kwargs):
|
||||||
|
captured["kwargs"] = api_kwargs
|
||||||
|
return _mock_response(content="ok", finish_reason="stop")
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"hermes_cli.plugins.invoke_hook",
|
||||||
|
return_value=[{"context": "plugin-turn-context"}],
|
||||||
|
),
|
||||||
|
patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call),
|
||||||
|
patch.object(agent, "_persist_session"),
|
||||||
|
patch.object(agent, "_save_trajectory"),
|
||||||
|
patch.object(agent, "_cleanup_task_resources"),
|
||||||
|
):
|
||||||
|
result = agent.run_conversation("hello")
|
||||||
|
|
||||||
|
assert result["completed"] is True
|
||||||
|
assert result["final_response"] == "ok"
|
||||||
|
messages = captured["kwargs"]["messages"]
|
||||||
|
assert messages[0]["role"] == "system"
|
||||||
|
|
||||||
|
system_blocks = messages[0]["content"]
|
||||||
|
assert isinstance(system_blocks, list)
|
||||||
|
assert system_blocks[0]["text"] == "You are helpful."
|
||||||
|
assert system_blocks[0]["cache_control"]["type"] == "ephemeral"
|
||||||
|
assert system_blocks[-1]["text"] == "plugin-turn-context"
|
||||||
|
assert "cache_control" not in system_blocks[-1]
|
||||||
|
|
||||||
def test_nous_401_refreshes_after_remint_and_retries(self, agent):
|
def test_nous_401_refreshes_after_remint_and_retries(self, agent):
|
||||||
self._setup_agent(agent)
|
self._setup_agent(agent)
|
||||||
agent.provider = "nous"
|
agent.provider = "nous"
|
||||||
|
|||||||
Reference in New Issue
Block a user