mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 15:31:38 +08:00
Compare commits
1 Commits
opencode-p
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
02028a6a9e |
@@ -43,6 +43,7 @@ def _supports_adaptive_thinking(model: str) -> bool:
|
||||
_COMMON_BETAS = [
|
||||
"interleaved-thinking-2025-05-14",
|
||||
"fine-grained-tool-streaming-2025-05-14",
|
||||
"context-management-2025-06-27",
|
||||
]
|
||||
|
||||
# Additional beta headers required for OAuth/subscription auth
|
||||
@@ -513,6 +514,7 @@ def build_anthropic_kwargs(
|
||||
max_tokens: Optional[int],
|
||||
reasoning_config: Optional[Dict[str, Any]],
|
||||
tool_choice: Optional[str] = None,
|
||||
context_editing: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build kwargs for anthropic.messages.create()."""
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages)
|
||||
@@ -562,6 +564,60 @@ def build_anthropic_kwargs(
|
||||
kwargs["temperature"] = 1
|
||||
kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
|
||||
|
||||
# Anthropic Context Editing API — server-side context management.
|
||||
# Clears old tool use/result pairs and thinking blocks server-side,
|
||||
# AFTER prompt cache lookup but BEFORE token counting and inference.
|
||||
# This preserves prompt cache prefixes while freeing context space.
|
||||
# Passed via extra_body since context_management is a beta parameter.
|
||||
if context_editing and isinstance(context_editing, dict) and context_editing.get("enabled"):
|
||||
from agent.model_metadata import get_model_context_length
|
||||
try:
|
||||
context_length = get_model_context_length(model)
|
||||
except Exception:
|
||||
context_length = 200_000 # Conservative default for Claude
|
||||
|
||||
trigger_tokens = context_editing.get("trigger_tokens") or int(context_length * 0.60)
|
||||
keep_tool_uses = context_editing.get("keep_tool_uses", 5)
|
||||
keep_thinking_turns = context_editing.get("keep_thinking_turns", 2)
|
||||
clear_at_least = context_editing.get("clear_at_least_tokens") or int(context_length * 0.10)
|
||||
exclude_tools = context_editing.get("exclude_tools") or ["memory", "skill_manage", "todo"]
|
||||
clear_tool_inputs = context_editing.get("clear_tool_inputs", False)
|
||||
|
||||
edits = []
|
||||
|
||||
# clear_thinking requires thinking to be enabled — only add when
|
||||
# reasoning is active (i.e. kwargs already has a "thinking" key).
|
||||
if "thinking" in kwargs:
|
||||
edits.append({
|
||||
"type": "clear_thinking_20251015",
|
||||
"keep": {
|
||||
"type": "thinking_turns",
|
||||
"value": keep_thinking_turns,
|
||||
},
|
||||
})
|
||||
|
||||
edits.append({
|
||||
"type": "clear_tool_uses_20250919",
|
||||
"trigger": {
|
||||
"type": "input_tokens",
|
||||
"value": trigger_tokens,
|
||||
},
|
||||
"keep": {
|
||||
"type": "tool_uses",
|
||||
"value": keep_tool_uses,
|
||||
},
|
||||
"clear_at_least": {
|
||||
"type": "input_tokens",
|
||||
"value": clear_at_least,
|
||||
},
|
||||
"exclude_tools": exclude_tools,
|
||||
"clear_tool_inputs": clear_tool_inputs,
|
||||
})
|
||||
|
||||
kwargs["extra_body"] = {
|
||||
"context_management": {"edits": edits}
|
||||
}
|
||||
|
||||
return kwargs
|
||||
|
||||
|
||||
|
||||
@@ -25,6 +25,29 @@ model:
|
||||
# api_key: "your-key-here" # Uncomment to set here instead of .env
|
||||
base_url: "https://openrouter.ai/api/v1"
|
||||
|
||||
# =============================================================================
|
||||
# Anthropic Context Editing (Claude-only, optional)
|
||||
# =============================================================================
|
||||
# Server-side context management for Claude models. Automatically clears old
|
||||
# tool call/result pairs and thinking blocks at the API level, AFTER prompt
|
||||
# cache lookup but BEFORE token counting. This preserves prompt cache prefixes
|
||||
# while freeing context space — something impossible with client-side stripping.
|
||||
#
|
||||
# Only works with direct Anthropic API (provider: anthropic). Disabled by default.
|
||||
# Anthropic reports ~29% performance improvement with context editing enabled.
|
||||
#
|
||||
# context_editing:
|
||||
# enabled: true # Enable server-side context editing
|
||||
# trigger_tokens: null # Input token threshold to start clearing (null = auto: 60% of context window)
|
||||
# keep_tool_uses: 5 # How many recent tool_use/result pairs to preserve
|
||||
# keep_thinking_turns: 2 # How many recent thinking turns to preserve
|
||||
# exclude_tools: # Tool calls that are NEVER cleared
|
||||
# - memory
|
||||
# - skill_manage
|
||||
# - todo
|
||||
# clear_tool_inputs: false # Also clear tool input params (default: false)
|
||||
# clear_at_least_tokens: null # Minimum tokens to clear per activation (null = auto: 10% of context window)
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Provider Routing (only applies when using OpenRouter)
|
||||
# =============================================================================
|
||||
|
||||
13
cli.py
13
cli.py
@@ -178,6 +178,15 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"threshold": 0.50, # Compress at 50% of model's context limit
|
||||
"summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries
|
||||
},
|
||||
"context_editing": {
|
||||
"enabled": False,
|
||||
"trigger_tokens": None, # None = auto (60% of context window)
|
||||
"keep_tool_uses": 5,
|
||||
"keep_thinking_turns": 2,
|
||||
"exclude_tools": ["memory", "skill_manage", "todo"],
|
||||
"clear_tool_inputs": False,
|
||||
"clear_at_least_tokens": None, # None = auto (10% of context window)
|
||||
},
|
||||
"agent": {
|
||||
"max_turns": 90, # Default max tool-calling iterations (shared with subagents)
|
||||
"verbose": False,
|
||||
@@ -1217,6 +1226,9 @@ class HermesCLI:
|
||||
CLI_CONFIG["agent"].get("reasoning_effort", "")
|
||||
)
|
||||
|
||||
# Context editing config (Anthropic server-side context management)
|
||||
self.context_editing = CLI_CONFIG.get("context_editing") or {}
|
||||
|
||||
# OpenRouter provider routing preferences
|
||||
pr = CLI_CONFIG.get("provider_routing", {}) or {}
|
||||
self._provider_sort = pr.get("sort")
|
||||
@@ -1503,6 +1515,7 @@ class HermesCLI:
|
||||
ephemeral_system_prompt=self.system_prompt if self.system_prompt else None,
|
||||
prefill_messages=self.prefill_messages or None,
|
||||
reasoning_config=self.reasoning_config,
|
||||
context_editing=self.context_editing,
|
||||
providers_allowed=self._providers_only,
|
||||
providers_ignored=self._providers_ignore,
|
||||
providers_order=self._providers_order,
|
||||
|
||||
@@ -228,6 +228,7 @@ class GatewayRunner:
|
||||
self._prefill_messages = self._load_prefill_messages()
|
||||
self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
|
||||
self._reasoning_config = self._load_reasoning_config()
|
||||
self._context_editing = self._load_context_editing()
|
||||
self._show_reasoning = self._load_show_reasoning()
|
||||
self._provider_routing = self._load_provider_routing()
|
||||
self._fallback_model = self._load_fallback_model()
|
||||
@@ -489,6 +490,23 @@ class GatewayRunner:
|
||||
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _load_context_editing() -> dict:
|
||||
"""Load context_editing config from config.yaml.
|
||||
|
||||
Returns the context_editing dict if present, or empty dict.
|
||||
"""
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
return cfg.get("context_editing") or {}
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def _load_show_reasoning() -> bool:
|
||||
"""Load show_reasoning toggle from config.yaml display section."""
|
||||
@@ -2195,6 +2213,7 @@ class GatewayRunner:
|
||||
verbose_logging=False,
|
||||
enabled_toolsets=enabled_toolsets,
|
||||
reasoning_config=self._reasoning_config,
|
||||
context_editing=getattr(self, "_context_editing", {}),
|
||||
providers_allowed=pr.get("only"),
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
@@ -3360,6 +3379,7 @@ class GatewayRunner:
|
||||
ephemeral_system_prompt=combined_ephemeral or None,
|
||||
prefill_messages=self._prefill_messages or None,
|
||||
reasoning_config=self._reasoning_config,
|
||||
context_editing=getattr(self, "_context_editing", {}),
|
||||
providers_allowed=pr.get("only"),
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
|
||||
@@ -129,6 +129,16 @@ DEFAULT_CONFIG = {
|
||||
"summary_provider": "auto",
|
||||
},
|
||||
|
||||
"context_editing": {
|
||||
"enabled": False,
|
||||
"trigger_tokens": None,
|
||||
"keep_tool_uses": 5,
|
||||
"keep_thinking_turns": 2,
|
||||
"exclude_tools": ["memory", "skill_manage", "todo"],
|
||||
"clear_tool_inputs": False,
|
||||
"clear_at_least_tokens": None,
|
||||
},
|
||||
|
||||
# Auxiliary model config — provider:model for each side task.
|
||||
# Format: provider is the provider name, model is the model slug.
|
||||
# "auto" for provider = auto-detect best available provider.
|
||||
|
||||
@@ -232,6 +232,7 @@ class AIAgent:
|
||||
step_callback: callable = None,
|
||||
max_tokens: int = None,
|
||||
reasoning_config: Dict[str, Any] = None,
|
||||
context_editing: Dict[str, Any] = None,
|
||||
prefill_messages: List[Dict[str, Any]] = None,
|
||||
platform: str = None,
|
||||
skip_context_files: bool = False,
|
||||
@@ -352,6 +353,7 @@ class AIAgent:
|
||||
# Model response configuration
|
||||
self.max_tokens = max_tokens # None = use model default
|
||||
self.reasoning_config = reasoning_config # None = use default (medium for OpenRouter)
|
||||
self.context_editing = context_editing # Anthropic server-side context management
|
||||
self.prefill_messages = prefill_messages or [] # Prefilled conversation turns
|
||||
|
||||
# Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
|
||||
@@ -2660,6 +2662,7 @@ class AIAgent:
|
||||
tools=self.tools,
|
||||
max_tokens=self.max_tokens,
|
||||
reasoning_config=self.reasoning_config,
|
||||
context_editing=self.context_editing,
|
||||
)
|
||||
|
||||
if self.api_mode == "codex_responses":
|
||||
|
||||
158
tests/test_context_editing.py
Normal file
158
tests/test_context_editing.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""Tests for Anthropic Context Editing API integration."""
|
||||
|
||||
import pytest
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
|
||||
class TestContextEditing:
|
||||
"""Tests for context_management parameter injection via extra_body."""
|
||||
|
||||
def _simple_messages(self):
|
||||
return [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
|
||||
def test_disabled_by_default(self):
|
||||
"""No extra_body/context_management when context_editing is None."""
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=self._simple_messages(),
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config=None,
|
||||
)
|
||||
assert "extra_body" not in kwargs
|
||||
assert "context_management" not in kwargs
|
||||
|
||||
def test_disabled_when_false(self):
|
||||
"""No context_management when enabled is False."""
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=self._simple_messages(),
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config=None,
|
||||
context_editing={"enabled": False},
|
||||
)
|
||||
assert "extra_body" not in kwargs
|
||||
|
||||
def test_enabled_adds_context_management(self):
|
||||
"""context_management is added via extra_body when enabled."""
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=self._simple_messages(),
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config=None,
|
||||
context_editing={"enabled": True},
|
||||
)
|
||||
assert "extra_body" in kwargs
|
||||
cm = kwargs["extra_body"]["context_management"]
|
||||
assert "edits" in cm
|
||||
# Without thinking enabled, only tool_uses edit is included
|
||||
assert len(cm["edits"]) == 1
|
||||
assert cm["edits"][0]["type"] == "clear_tool_uses_20250919"
|
||||
|
||||
def test_thinking_edit_only_when_thinking_enabled(self):
|
||||
"""clear_thinking is only added when reasoning/thinking is enabled."""
|
||||
# Without thinking
|
||||
kwargs_no_think = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=self._simple_messages(),
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config=None,
|
||||
context_editing={"enabled": True},
|
||||
)
|
||||
edits = kwargs_no_think["extra_body"]["context_management"]["edits"]
|
||||
assert all(e["type"] != "clear_thinking_20251015" for e in edits)
|
||||
|
||||
# With thinking enabled
|
||||
kwargs_with_think = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=self._simple_messages(),
|
||||
tools=None,
|
||||
max_tokens=16384,
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
context_editing={"enabled": True},
|
||||
)
|
||||
edits = kwargs_with_think["extra_body"]["context_management"]["edits"]
|
||||
assert len(edits) == 2
|
||||
assert edits[0]["type"] == "clear_thinking_20251015"
|
||||
assert edits[1]["type"] == "clear_tool_uses_20250919"
|
||||
|
||||
def test_custom_values(self):
|
||||
"""Custom config values are passed through."""
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=self._simple_messages(),
|
||||
tools=None,
|
||||
max_tokens=16384,
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
context_editing={
|
||||
"enabled": True,
|
||||
"trigger_tokens": 80000,
|
||||
"keep_tool_uses": 10,
|
||||
"keep_thinking_turns": 3,
|
||||
"clear_at_least_tokens": 20000,
|
||||
"exclude_tools": ["memory", "web_search"],
|
||||
"clear_tool_inputs": True,
|
||||
},
|
||||
)
|
||||
edits = kwargs["extra_body"]["context_management"]["edits"]
|
||||
|
||||
thinking = edits[0]
|
||||
assert thinking["keep"]["value"] == 3
|
||||
|
||||
tools = edits[1]
|
||||
assert tools["trigger"]["value"] == 80000
|
||||
assert tools["keep"]["value"] == 10
|
||||
assert tools["clear_at_least"]["value"] == 20000
|
||||
assert tools["exclude_tools"] == ["memory", "web_search"]
|
||||
assert tools["clear_tool_inputs"] is True
|
||||
|
||||
def test_default_exclude_tools(self):
|
||||
"""Default exclude_tools list is memory, skill_manage, todo."""
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=self._simple_messages(),
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config=None,
|
||||
context_editing={"enabled": True},
|
||||
)
|
||||
exclude = kwargs["extra_body"]["context_management"]["edits"][0]["exclude_tools"]
|
||||
assert "memory" in exclude
|
||||
assert "skill_manage" in exclude
|
||||
assert "todo" in exclude
|
||||
|
||||
def test_auto_scales_to_context_window(self):
|
||||
"""Trigger and clear_at_least scale proportionally to context window."""
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=self._simple_messages(),
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config=None,
|
||||
context_editing={"enabled": True},
|
||||
)
|
||||
tools_edit = kwargs["extra_body"]["context_management"]["edits"][0]
|
||||
trigger = tools_edit["trigger"]["value"]
|
||||
clear_at_least = tools_edit["clear_at_least"]["value"]
|
||||
# Should be proportional — trigger ~60%, clear_at_least ~10%
|
||||
assert trigger > 50000
|
||||
assert clear_at_least > 5000
|
||||
assert trigger > clear_at_least
|
||||
|
||||
def test_empty_dict_does_nothing(self):
|
||||
"""Empty config dict does not add context_management."""
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=self._simple_messages(),
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config=None,
|
||||
context_editing={},
|
||||
)
|
||||
assert "extra_body" not in kwargs
|
||||
Reference in New Issue
Block a user