mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 23:41:35 +08:00
Compare commits
2 Commits
fix/plugin
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bd0c3eadd1 | ||
|
|
9a61265824 |
13
cli.py
13
cli.py
@@ -3836,6 +3836,8 @@ class HermesCLI:
|
|||||||
self.console.print(f" Status bar {state}")
|
self.console.print(f" Status bar {state}")
|
||||||
elif canonical == "verbose":
|
elif canonical == "verbose":
|
||||||
self._toggle_verbose()
|
self._toggle_verbose()
|
||||||
|
elif canonical == "yolo":
|
||||||
|
self._toggle_yolo()
|
||||||
elif canonical == "reasoning":
|
elif canonical == "reasoning":
|
||||||
self._handle_reasoning_command(cmd_original)
|
self._handle_reasoning_command(cmd_original)
|
||||||
elif canonical == "compress":
|
elif canonical == "compress":
|
||||||
@@ -4434,6 +4436,17 @@ class HermesCLI:
|
|||||||
}
|
}
|
||||||
_cprint(labels.get(self.tool_progress_mode, ""))
|
_cprint(labels.get(self.tool_progress_mode, ""))
|
||||||
|
|
||||||
|
def _toggle_yolo(self):
|
||||||
|
"""Toggle YOLO mode — skip all dangerous command approval prompts."""
|
||||||
|
import os
|
||||||
|
current = bool(os.environ.get("HERMES_YOLO_MODE"))
|
||||||
|
if current:
|
||||||
|
os.environ.pop("HERMES_YOLO_MODE", None)
|
||||||
|
self.console.print(" ⚠ YOLO mode [bold red]OFF[/] — dangerous commands will require approval.")
|
||||||
|
else:
|
||||||
|
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||||
|
self.console.print(" ⚡ YOLO mode [bold green]ON[/] — all commands auto-approved. Use with caution.")
|
||||||
|
|
||||||
def _handle_reasoning_command(self, cmd: str):
|
def _handle_reasoning_command(self, cmd: str):
|
||||||
"""Handle /reasoning — manage effort level and display toggle.
|
"""Handle /reasoning — manage effort level and display toggle.
|
||||||
|
|
||||||
|
|||||||
@@ -432,6 +432,13 @@ class GatewayRunner:
|
|||||||
self._honcho_managers: Dict[str, Any] = {}
|
self._honcho_managers: Dict[str, Any] = {}
|
||||||
self._honcho_configs: Dict[str, Any] = {}
|
self._honcho_configs: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
# Rate-limit compression warning messages sent to users.
|
||||||
|
# Keyed by chat_id — value is the timestamp of the last warning sent.
|
||||||
|
# Prevents the warning from firing on every message when a session
|
||||||
|
# remains above the threshold after compression.
|
||||||
|
self._compression_warn_sent: Dict[str, float] = {}
|
||||||
|
self._compression_warn_cooldown: int = 3600 # seconds (1 hour)
|
||||||
|
|
||||||
# Ensure tirith security scanner is available (downloads if needed)
|
# Ensure tirith security scanner is available (downloads if needed)
|
||||||
try:
|
try:
|
||||||
from tools.tirith_security import ensure_installed
|
from tools.tirith_security import ensure_installed
|
||||||
@@ -1830,6 +1837,9 @@ class GatewayRunner:
|
|||||||
if canonical == "verbose":
|
if canonical == "verbose":
|
||||||
return await self._handle_verbose_command(event)
|
return await self._handle_verbose_command(event)
|
||||||
|
|
||||||
|
if canonical == "yolo":
|
||||||
|
return await self._handle_yolo_command(event)
|
||||||
|
|
||||||
if canonical == "provider":
|
if canonical == "provider":
|
||||||
return await self._handle_provider_command(event)
|
return await self._handle_provider_command(event)
|
||||||
|
|
||||||
@@ -2344,13 +2354,18 @@ class GatewayRunner:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
# Still too large after compression — warn user
|
# Still too large after compression — warn user
|
||||||
|
# Rate-limited to once per cooldown period per
|
||||||
|
# chat to avoid spamming on every message.
|
||||||
if _new_tokens >= _warn_token_threshold:
|
if _new_tokens >= _warn_token_threshold:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Session hygiene: still ~%s tokens after "
|
"Session hygiene: still ~%s tokens after "
|
||||||
"compression — suggesting /reset",
|
"compression — suggesting /reset",
|
||||||
f"{_new_tokens:,}",
|
f"{_new_tokens:,}",
|
||||||
)
|
)
|
||||||
if _hyg_adapter:
|
_now = time.time()
|
||||||
|
_last_warn = self._compression_warn_sent.get(source.chat_id, 0)
|
||||||
|
if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
|
||||||
|
self._compression_warn_sent[source.chat_id] = _now
|
||||||
try:
|
try:
|
||||||
await _hyg_adapter.send(
|
await _hyg_adapter.send(
|
||||||
source.chat_id,
|
source.chat_id,
|
||||||
@@ -2372,7 +2387,10 @@ class GatewayRunner:
|
|||||||
if _approx_tokens >= _warn_token_threshold:
|
if _approx_tokens >= _warn_token_threshold:
|
||||||
_hyg_adapter = self.adapters.get(source.platform)
|
_hyg_adapter = self.adapters.get(source.platform)
|
||||||
_hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
_hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
||||||
if _hyg_adapter:
|
_now = time.time()
|
||||||
|
_last_warn = self._compression_warn_sent.get(source.chat_id, 0)
|
||||||
|
if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
|
||||||
|
self._compression_warn_sent[source.chat_id] = _now
|
||||||
try:
|
try:
|
||||||
await _hyg_adapter.send(
|
await _hyg_adapter.send(
|
||||||
source.chat_id,
|
source.chat_id,
|
||||||
@@ -3999,6 +4017,16 @@ class GatewayRunner:
|
|||||||
else:
|
else:
|
||||||
return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"
|
return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"
|
||||||
|
|
||||||
|
async def _handle_yolo_command(self, event: MessageEvent) -> str:
|
||||||
|
"""Handle /yolo — toggle dangerous command approval bypass."""
|
||||||
|
current = bool(os.environ.get("HERMES_YOLO_MODE"))
|
||||||
|
if current:
|
||||||
|
os.environ.pop("HERMES_YOLO_MODE", None)
|
||||||
|
return "⚠️ YOLO mode **OFF** — dangerous commands will require approval."
|
||||||
|
else:
|
||||||
|
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||||
|
return "⚡ YOLO mode **ON** — all commands auto-approved. Use with caution."
|
||||||
|
|
||||||
async def _handle_verbose_command(self, event: MessageEvent) -> str:
|
async def _handle_verbose_command(self, event: MessageEvent) -> str:
|
||||||
"""Handle /verbose command — cycle tool progress display mode.
|
"""Handle /verbose command — cycle tool progress display mode.
|
||||||
|
|
||||||
|
|||||||
@@ -90,6 +90,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||||||
CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
|
CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
|
||||||
"Configuration", cli_only=True,
|
"Configuration", cli_only=True,
|
||||||
gateway_config_gate="display.tool_progress_command"),
|
gateway_config_gate="display.tool_progress_command"),
|
||||||
|
CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
|
||||||
|
"Configuration"),
|
||||||
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
||||||
args_hint="[level|show|hide]",
|
args_hint="[level|show|hide]",
|
||||||
subcommands=("none", "low", "minimal", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
subcommands=("none", "low", "minimal", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
||||||
|
|||||||
17
run_agent.py
17
run_agent.py
@@ -5204,11 +5204,8 @@ class AIAgent:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
||||||
|
|
||||||
# Reset context pressure warning and token estimate — usage drops
|
# Update token estimate after compaction so pressure calculations
|
||||||
# after compaction. Without this, the stale last_prompt_tokens from
|
# use the post-compression count, not the stale pre-compression one.
|
||||||
# the previous API call causes the pressure calculation to stay at
|
|
||||||
# >1000% and spam warnings / re-trigger compression in a loop.
|
|
||||||
self._context_pressure_warned = False
|
|
||||||
_compressed_est = (
|
_compressed_est = (
|
||||||
estimate_tokens_rough(new_system_prompt)
|
estimate_tokens_rough(new_system_prompt)
|
||||||
+ estimate_messages_tokens_rough(compressed)
|
+ estimate_messages_tokens_rough(compressed)
|
||||||
@@ -5216,6 +5213,16 @@ class AIAgent:
|
|||||||
self.context_compressor.last_prompt_tokens = _compressed_est
|
self.context_compressor.last_prompt_tokens = _compressed_est
|
||||||
self.context_compressor.last_completion_tokens = 0
|
self.context_compressor.last_completion_tokens = 0
|
||||||
|
|
||||||
|
# Only reset the pressure warning if compression actually brought
|
||||||
|
# us below the warning level (85% of threshold). When compression
|
||||||
|
# can't reduce enough (e.g. threshold is very low, or system prompt
|
||||||
|
# alone exceeds the warning level), keep the flag set to prevent
|
||||||
|
# spamming the user with repeated warnings every loop iteration.
|
||||||
|
if self.context_compressor.threshold_tokens > 0:
|
||||||
|
_post_progress = _compressed_est / self.context_compressor.threshold_tokens
|
||||||
|
if _post_progress < 0.85:
|
||||||
|
self._context_pressure_warned = False
|
||||||
|
|
||||||
return compressed, new_system_prompt
|
return compressed, new_system_prompt
|
||||||
|
|
||||||
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||||
|
|||||||
@@ -212,6 +212,49 @@ class TestSessionHygieneWarnThreshold:
|
|||||||
assert post_compress_tokens < warn_threshold
|
assert post_compress_tokens < warn_threshold
|
||||||
|
|
||||||
|
|
||||||
|
class TestCompressionWarnRateLimit:
|
||||||
|
"""Compression warning messages must be rate-limited per chat_id."""
|
||||||
|
|
||||||
|
def _make_runner(self):
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
with patch("gateway.run.load_gateway_config"), \
|
||||||
|
patch("gateway.run.SessionStore"), \
|
||||||
|
patch("gateway.run.DeliveryRouter"):
|
||||||
|
from gateway.run import GatewayRunner
|
||||||
|
runner = GatewayRunner.__new__(GatewayRunner)
|
||||||
|
runner._compression_warn_sent = {}
|
||||||
|
runner._compression_warn_cooldown = 3600
|
||||||
|
return runner
|
||||||
|
|
||||||
|
def test_first_warn_is_sent(self):
|
||||||
|
runner = self._make_runner()
|
||||||
|
now = 1_000_000.0
|
||||||
|
last = runner._compression_warn_sent.get("chat:1", 0)
|
||||||
|
assert now - last >= runner._compression_warn_cooldown
|
||||||
|
|
||||||
|
def test_second_warn_suppressed_within_cooldown(self):
|
||||||
|
runner = self._make_runner()
|
||||||
|
now = 1_000_000.0
|
||||||
|
runner._compression_warn_sent["chat:1"] = now - 60 # 1 minute ago
|
||||||
|
last = runner._compression_warn_sent.get("chat:1", 0)
|
||||||
|
assert now - last < runner._compression_warn_cooldown
|
||||||
|
|
||||||
|
def test_warn_allowed_after_cooldown(self):
|
||||||
|
runner = self._make_runner()
|
||||||
|
now = 1_000_000.0
|
||||||
|
runner._compression_warn_sent["chat:1"] = now - 3601 # just past cooldown
|
||||||
|
last = runner._compression_warn_sent.get("chat:1", 0)
|
||||||
|
assert now - last >= runner._compression_warn_cooldown
|
||||||
|
|
||||||
|
def test_rate_limit_is_per_chat(self):
|
||||||
|
"""Rate-limiting one chat must not suppress warnings for another."""
|
||||||
|
runner = self._make_runner()
|
||||||
|
now = 1_000_000.0
|
||||||
|
runner._compression_warn_sent["chat:1"] = now - 60 # suppressed
|
||||||
|
last_other = runner._compression_warn_sent.get("chat:2", 0)
|
||||||
|
assert now - last_other >= runner._compression_warn_cooldown
|
||||||
|
|
||||||
|
|
||||||
class TestEstimatedTokenThreshold:
|
class TestEstimatedTokenThreshold:
|
||||||
"""Verify that hygiene thresholds are always below the model's context
|
"""Verify that hygiene thresholds are always below the model's context
|
||||||
limit — for both actual and estimated token counts.
|
limit — for both actual and estimated token counts.
|
||||||
|
|||||||
Reference in New Issue
Block a user