mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 23:11:37 +08:00
Compare commits
2 Commits
skill/gith
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bd0c3eadd1 | ||
|
|
9a61265824 |
13
cli.py
13
cli.py
@@ -3836,6 +3836,8 @@ class HermesCLI:
|
||||
self.console.print(f" Status bar {state}")
|
||||
elif canonical == "verbose":
|
||||
self._toggle_verbose()
|
||||
elif canonical == "yolo":
|
||||
self._toggle_yolo()
|
||||
elif canonical == "reasoning":
|
||||
self._handle_reasoning_command(cmd_original)
|
||||
elif canonical == "compress":
|
||||
@@ -4434,6 +4436,17 @@ class HermesCLI:
|
||||
}
|
||||
_cprint(labels.get(self.tool_progress_mode, ""))
|
||||
|
||||
def _toggle_yolo(self):
|
||||
"""Toggle YOLO mode — skip all dangerous command approval prompts."""
|
||||
import os
|
||||
current = bool(os.environ.get("HERMES_YOLO_MODE"))
|
||||
if current:
|
||||
os.environ.pop("HERMES_YOLO_MODE", None)
|
||||
self.console.print(" ⚠ YOLO mode [bold red]OFF[/] — dangerous commands will require approval.")
|
||||
else:
|
||||
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||
self.console.print(" ⚡ YOLO mode [bold green]ON[/] — all commands auto-approved. Use with caution.")
|
||||
|
||||
def _handle_reasoning_command(self, cmd: str):
|
||||
"""Handle /reasoning — manage effort level and display toggle.
|
||||
|
||||
|
||||
@@ -432,6 +432,13 @@ class GatewayRunner:
|
||||
self._honcho_managers: Dict[str, Any] = {}
|
||||
self._honcho_configs: Dict[str, Any] = {}
|
||||
|
||||
# Rate-limit compression warning messages sent to users.
|
||||
# Keyed by chat_id — value is the timestamp of the last warning sent.
|
||||
# Prevents the warning from firing on every message when a session
|
||||
# remains above the threshold after compression.
|
||||
self._compression_warn_sent: Dict[str, float] = {}
|
||||
self._compression_warn_cooldown: int = 3600 # seconds (1 hour)
|
||||
|
||||
# Ensure tirith security scanner is available (downloads if needed)
|
||||
try:
|
||||
from tools.tirith_security import ensure_installed
|
||||
@@ -1830,6 +1837,9 @@ class GatewayRunner:
|
||||
if canonical == "verbose":
|
||||
return await self._handle_verbose_command(event)
|
||||
|
||||
if canonical == "yolo":
|
||||
return await self._handle_yolo_command(event)
|
||||
|
||||
if canonical == "provider":
|
||||
return await self._handle_provider_command(event)
|
||||
|
||||
@@ -2344,13 +2354,18 @@ class GatewayRunner:
|
||||
pass
|
||||
|
||||
# Still too large after compression — warn user
|
||||
# Rate-limited to once per cooldown period per
|
||||
# chat to avoid spamming on every message.
|
||||
if _new_tokens >= _warn_token_threshold:
|
||||
logger.warning(
|
||||
"Session hygiene: still ~%s tokens after "
|
||||
"compression — suggesting /reset",
|
||||
f"{_new_tokens:,}",
|
||||
)
|
||||
if _hyg_adapter:
|
||||
_now = time.time()
|
||||
_last_warn = self._compression_warn_sent.get(source.chat_id, 0)
|
||||
if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
|
||||
self._compression_warn_sent[source.chat_id] = _now
|
||||
try:
|
||||
await _hyg_adapter.send(
|
||||
source.chat_id,
|
||||
@@ -2372,7 +2387,10 @@ class GatewayRunner:
|
||||
if _approx_tokens >= _warn_token_threshold:
|
||||
_hyg_adapter = self.adapters.get(source.platform)
|
||||
_hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
||||
if _hyg_adapter:
|
||||
_now = time.time()
|
||||
_last_warn = self._compression_warn_sent.get(source.chat_id, 0)
|
||||
if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
|
||||
self._compression_warn_sent[source.chat_id] = _now
|
||||
try:
|
||||
await _hyg_adapter.send(
|
||||
source.chat_id,
|
||||
@@ -3999,6 +4017,16 @@ class GatewayRunner:
|
||||
else:
|
||||
return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"
|
||||
|
||||
async def _handle_yolo_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /yolo — toggle dangerous command approval bypass."""
|
||||
current = bool(os.environ.get("HERMES_YOLO_MODE"))
|
||||
if current:
|
||||
os.environ.pop("HERMES_YOLO_MODE", None)
|
||||
return "⚠️ YOLO mode **OFF** — dangerous commands will require approval."
|
||||
else:
|
||||
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||
return "⚡ YOLO mode **ON** — all commands auto-approved. Use with caution."
|
||||
|
||||
async def _handle_verbose_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /verbose command — cycle tool progress display mode.
|
||||
|
||||
|
||||
@@ -90,6 +90,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
|
||||
"Configuration", cli_only=True,
|
||||
gateway_config_gate="display.tool_progress_command"),
|
||||
CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
|
||||
"Configuration"),
|
||||
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
||||
args_hint="[level|show|hide]",
|
||||
subcommands=("none", "low", "minimal", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
||||
|
||||
17
run_agent.py
17
run_agent.py
@@ -5204,11 +5204,8 @@ class AIAgent:
|
||||
except Exception as e:
|
||||
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
||||
|
||||
# Reset context pressure warning and token estimate — usage drops
|
||||
# after compaction. Without this, the stale last_prompt_tokens from
|
||||
# the previous API call causes the pressure calculation to stay at
|
||||
# >1000% and spam warnings / re-trigger compression in a loop.
|
||||
self._context_pressure_warned = False
|
||||
# Update token estimate after compaction so pressure calculations
|
||||
# use the post-compression count, not the stale pre-compression one.
|
||||
_compressed_est = (
|
||||
estimate_tokens_rough(new_system_prompt)
|
||||
+ estimate_messages_tokens_rough(compressed)
|
||||
@@ -5216,6 +5213,16 @@ class AIAgent:
|
||||
self.context_compressor.last_prompt_tokens = _compressed_est
|
||||
self.context_compressor.last_completion_tokens = 0
|
||||
|
||||
# Only reset the pressure warning if compression actually brought
|
||||
# us below the warning level (85% of threshold). When compression
|
||||
# can't reduce enough (e.g. threshold is very low, or system prompt
|
||||
# alone exceeds the warning level), keep the flag set to prevent
|
||||
# spamming the user with repeated warnings every loop iteration.
|
||||
if self.context_compressor.threshold_tokens > 0:
|
||||
_post_progress = _compressed_est / self.context_compressor.threshold_tokens
|
||||
if _post_progress < 0.85:
|
||||
self._context_pressure_warned = False
|
||||
|
||||
return compressed, new_system_prompt
|
||||
|
||||
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
|
||||
@@ -212,6 +212,49 @@ class TestSessionHygieneWarnThreshold:
|
||||
assert post_compress_tokens < warn_threshold
|
||||
|
||||
|
||||
class TestCompressionWarnRateLimit:
|
||||
"""Compression warning messages must be rate-limited per chat_id."""
|
||||
|
||||
def _make_runner(self):
|
||||
from unittest.mock import MagicMock, patch
|
||||
with patch("gateway.run.load_gateway_config"), \
|
||||
patch("gateway.run.SessionStore"), \
|
||||
patch("gateway.run.DeliveryRouter"):
|
||||
from gateway.run import GatewayRunner
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
runner._compression_warn_sent = {}
|
||||
runner._compression_warn_cooldown = 3600
|
||||
return runner
|
||||
|
||||
def test_first_warn_is_sent(self):
|
||||
runner = self._make_runner()
|
||||
now = 1_000_000.0
|
||||
last = runner._compression_warn_sent.get("chat:1", 0)
|
||||
assert now - last >= runner._compression_warn_cooldown
|
||||
|
||||
def test_second_warn_suppressed_within_cooldown(self):
|
||||
runner = self._make_runner()
|
||||
now = 1_000_000.0
|
||||
runner._compression_warn_sent["chat:1"] = now - 60 # 1 minute ago
|
||||
last = runner._compression_warn_sent.get("chat:1", 0)
|
||||
assert now - last < runner._compression_warn_cooldown
|
||||
|
||||
def test_warn_allowed_after_cooldown(self):
|
||||
runner = self._make_runner()
|
||||
now = 1_000_000.0
|
||||
runner._compression_warn_sent["chat:1"] = now - 3601 # just past cooldown
|
||||
last = runner._compression_warn_sent.get("chat:1", 0)
|
||||
assert now - last >= runner._compression_warn_cooldown
|
||||
|
||||
def test_rate_limit_is_per_chat(self):
|
||||
"""Rate-limiting one chat must not suppress warnings for another."""
|
||||
runner = self._make_runner()
|
||||
now = 1_000_000.0
|
||||
runner._compression_warn_sent["chat:1"] = now - 60 # suppressed
|
||||
last_other = runner._compression_warn_sent.get("chat:2", 0)
|
||||
assert now - last_other >= runner._compression_warn_cooldown
|
||||
|
||||
|
||||
class TestEstimatedTokenThreshold:
|
||||
"""Verify that hygiene thresholds are always below the model's context
|
||||
limit — for both actual and estimated token counts.
|
||||
|
||||
Reference in New Issue
Block a user