mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-10 04:08:28 +08:00
Compare commits
1 Commits
feat/plugi
...
salvage/40
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bfa35a93fb |
152
cli.py
152
cli.py
@@ -974,7 +974,7 @@ def _run_cleanup():
|
||||
try:
|
||||
from tools.mcp_tool import shutdown_mcp_servers
|
||||
shutdown_mcp_servers()
|
||||
except BaseException:
|
||||
except Exception:
|
||||
pass
|
||||
# Close cached auxiliary LLM clients (sync + async) so that
|
||||
# AsyncHttpxClientWrapper.__del__ doesn't fire on a closed event loop
|
||||
@@ -3144,8 +3144,9 @@ class HermesCLI:
|
||||
|
||||
# streaming: stream tokens to the terminal as they arrive (display.streaming in config.yaml)
|
||||
self.streaming_enabled = CLI_CONFIG["display"].get("streaming", False)
|
||||
# show_timestamps: prefix user and assistant labels with [HH:MM]
|
||||
# show_timestamps: prefix user and assistant labels with timestamps
|
||||
self.show_timestamps = CLI_CONFIG["display"].get("timestamps", False)
|
||||
self.timestamp_format = CLI_CONFIG["display"].get("timestamp_format", "%H:%M")
|
||||
self.final_response_markdown = str(
|
||||
CLI_CONFIG["display"].get("final_response_markdown", "strip")
|
||||
).strip().lower() or "strip"
|
||||
@@ -3194,18 +3195,6 @@ class HermesCLI:
|
||||
_config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
|
||||
_DEFAULT_CONFIG_MODEL = ""
|
||||
self.model = model or _config_model or _DEFAULT_CONFIG_MODEL
|
||||
# Read max_tokens from config (env var override: HERMES_MAX_TOKENS)
|
||||
_env_mt = os.environ.get("HERMES_MAX_TOKENS")
|
||||
if _env_mt:
|
||||
try:
|
||||
self.max_tokens = int(_env_mt)
|
||||
except (ValueError, TypeError):
|
||||
self.max_tokens = None
|
||||
elif isinstance(_model_config, dict):
|
||||
_mt = _model_config.get("max_tokens")
|
||||
self.max_tokens = _mt if isinstance(_mt, int) else None
|
||||
else:
|
||||
self.max_tokens = None
|
||||
# Auto-detect model from local server if still on default
|
||||
if self.model == _DEFAULT_CONFIG_MODEL:
|
||||
_base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else ""
|
||||
@@ -4234,52 +4223,6 @@ class HermesCLI:
|
||||
self._tool_start_time = 0.0 # clear tool timer when switching to thinking
|
||||
self._invalidate()
|
||||
|
||||
def _on_notice(self, notice) -> None:
|
||||
"""Queue an out-of-band AgentNotice for rendering at the next clean boundary.
|
||||
|
||||
Notices fire from inside the agent turn (cold-start seed during _init_agent,
|
||||
per-turn _capture_credits after the API call) — printing immediately races the
|
||||
streaming response and the line gets buried behind the prompt (see _cprint's
|
||||
bg-thread caveat). So we QUEUE here and flush in _flush_credit_notices(), called
|
||||
right after run_conversation returns. Fail-soft: never break the turn.
|
||||
"""
|
||||
try:
|
||||
text = getattr(notice, "text", "") or ""
|
||||
if not text:
|
||||
return
|
||||
level = getattr(notice, "level", "info") or "info"
|
||||
if not hasattr(self, "_pending_credit_notices"):
|
||||
self._pending_credit_notices = []
|
||||
self._pending_credit_notices.append((level, text))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _flush_credit_notices(self) -> None:
|
||||
"""Print any queued credit notices as level-colored lines. Called at turn end
|
||||
(after run_conversation) where _cprint paints cleanly above the prompt."""
|
||||
try:
|
||||
pending = getattr(self, "_pending_credit_notices", None)
|
||||
if not pending:
|
||||
return
|
||||
self._pending_credit_notices = []
|
||||
for level, text in pending:
|
||||
color = {
|
||||
"error": "\033[31m",
|
||||
"warn": "\033[33m",
|
||||
"success": "\033[32m",
|
||||
"info": _DIM,
|
||||
}.get(level, _DIM)
|
||||
_cprint(f" {color}{text}{_RST}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _on_notice_clear(self, key: str) -> None:
|
||||
"""Notice cleared. The REPL prints lines (no persistent slot to wipe), so
|
||||
this drops any still-queued notice with that key is not tracked by key here;
|
||||
it's a no-op for rendering — kept so the agent's clear callback is bound
|
||||
symmetrically with the show callback (and so future REPL UIs can hook it)."""
|
||||
return
|
||||
|
||||
# ── Streaming display ────────────────────────────────────────────────
|
||||
|
||||
def _current_reasoning_callback(self):
|
||||
@@ -4377,7 +4320,7 @@ class HermesCLI:
|
||||
def _format_submitted_user_message_preview(self, user_input: str) -> str:
|
||||
"""Format the submitted user-message scrollback preview."""
|
||||
ts_suffix = (
|
||||
f" [dim]{datetime.now().strftime('%H:%M')}[/]"
|
||||
f" [dim]{datetime.now().strftime(getattr(self, 'timestamp_format', '%H:%M'))}[/]"
|
||||
if getattr(self, "show_timestamps", False) else ""
|
||||
)
|
||||
lines = user_input.split("\n")
|
||||
@@ -4672,7 +4615,7 @@ class HermesCLI:
|
||||
except (ValueError, IndexError):
|
||||
self._stream_text_ansi = ""
|
||||
if self.show_timestamps:
|
||||
label = f"{label} {datetime.now().strftime('%H:%M')}"
|
||||
label = f"{label} {datetime.now().strftime(getattr(self, 'timestamp_format', '%H:%M'))}"
|
||||
w = self._scrollback_box_width()
|
||||
fill = w - 2 - HermesCLI._status_bar_display_width(label)
|
||||
_cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
|
||||
@@ -5155,9 +5098,9 @@ class HermesCLI:
|
||||
resolved_id = self.session_id
|
||||
if resolved_id and resolved_id != self.session_id:
|
||||
ChatConsole().print(
|
||||
f"[dim]Session {_escape(self.session_id)} was compressed into "
|
||||
f"[{_DIM}]Session {_escape(self.session_id)} was compressed into "
|
||||
f"{_escape(resolved_id)}; resuming the descendant with your "
|
||||
f"transcript.[/dim]"
|
||||
f"transcript.[/]"
|
||||
)
|
||||
self.session_id = resolved_id
|
||||
resolved_meta = self._session_db.get_session(self.session_id)
|
||||
@@ -5226,7 +5169,6 @@ class HermesCLI:
|
||||
acp_command=runtime.get("command"),
|
||||
acp_args=runtime.get("args"),
|
||||
credential_pool=runtime.get("credential_pool"),
|
||||
max_tokens=self.max_tokens,
|
||||
max_iterations=self.max_turns,
|
||||
enabled_toolsets=self.enabled_toolsets,
|
||||
disabled_toolsets=self.disabled_toolsets,
|
||||
@@ -5264,8 +5206,6 @@ class HermesCLI:
|
||||
tool_complete_callback=self._on_tool_complete if self._inline_diffs_enabled else None,
|
||||
stream_delta_callback=self._stream_delta if self.streaming_enabled else None,
|
||||
tool_gen_callback=self._on_tool_gen_start if self.streaming_enabled else None,
|
||||
notice_callback=self._on_notice,
|
||||
notice_clear_callback=self._on_notice_clear,
|
||||
)
|
||||
# Store reference for atexit memory provider shutdown
|
||||
global _active_agent_ref
|
||||
@@ -5273,16 +5213,6 @@ class HermesCLI:
|
||||
# Route agent status output through prompt_toolkit so ANSI escape
|
||||
# sequences aren't garbled by patch_stdout's StdoutProxy (#2262).
|
||||
self.agent._print_fn = _cprint
|
||||
# Hydrate credits notices at session OPEN (parity with the TUI), so a
|
||||
# depletion / usage-band warning shows before the first message. The
|
||||
# notice_callback is bound above → _on_notice renders the line. Idempotent
|
||||
# + fail-open inside the helper; harmless for non-Nous providers.
|
||||
try:
|
||||
from agent.credits_tracker import seed_credits_at_session_start
|
||||
|
||||
seed_credits_at_session_start(self.agent)
|
||||
except Exception:
|
||||
pass
|
||||
self._active_agent_route_signature = (
|
||||
effective_model,
|
||||
runtime.get("provider"),
|
||||
@@ -5449,7 +5379,7 @@ class HermesCLI:
|
||||
if quiet:
|
||||
print(msg, file=sys.stderr)
|
||||
else:
|
||||
self._console_print(f"[dim]{_escape(msg)}[/dim]")
|
||||
self._console_print(f"[{_DIM}]{_escape(msg)}[/]")
|
||||
return
|
||||
|
||||
try:
|
||||
@@ -5459,7 +5389,7 @@ class HermesCLI:
|
||||
if quiet:
|
||||
print(msg, file=sys.stderr)
|
||||
else:
|
||||
self._console_print(f"[dim]{_escape(msg)}[/dim]")
|
||||
self._console_print(f"[{_DIM}]{_escape(msg)}[/]")
|
||||
return
|
||||
|
||||
# Retarget the terminal/code-exec tools to match the process cwd.
|
||||
@@ -5469,7 +5399,7 @@ class HermesCLI:
|
||||
if quiet:
|
||||
print(msg, file=sys.stderr)
|
||||
else:
|
||||
self._console_print(f"[dim]{_escape(msg)}[/dim]")
|
||||
self._console_print(f"[{_DIM}]{_escape(msg)}[/]")
|
||||
|
||||
def _preload_resumed_session(self) -> bool:
|
||||
"""Load a resumed session's history from the DB early (before first chat).
|
||||
@@ -9073,10 +9003,6 @@ class HermesCLI:
|
||||
elif canonical == "update":
|
||||
if self._handle_update_command():
|
||||
return False
|
||||
elif canonical == "version":
|
||||
from hermes_cli.main import _print_version_info
|
||||
|
||||
_print_version_info(check_updates=True)
|
||||
elif canonical == "paste":
|
||||
self._handle_paste_command()
|
||||
elif canonical == "image":
|
||||
@@ -9359,7 +9285,6 @@ class HermesCLI:
|
||||
api_mode=turn_route["runtime"].get("api_mode"),
|
||||
acp_command=turn_route["runtime"].get("command"),
|
||||
acp_args=turn_route["runtime"].get("args"),
|
||||
max_tokens=turn_route["runtime"].get("max_tokens"),
|
||||
max_iterations=self.max_turns,
|
||||
enabled_toolsets=self.enabled_toolsets,
|
||||
quiet_mode=True,
|
||||
@@ -10593,24 +10518,16 @@ class HermesCLI:
|
||||
return True
|
||||
|
||||
def _show_usage(self):
|
||||
"""Rate limits + session token usage (when a live agent exists) + Nous credits.
|
||||
|
||||
The Nous credits block is agent-independent (a portal fetch), so it runs even
|
||||
with no live agent — important for the TUI, where /usage runs in a slash-worker
|
||||
subprocess that resumes the session WITHOUT building an agent (self.agent is None),
|
||||
which would otherwise early-return before any credits showed.
|
||||
"""
|
||||
"""Show rate limits (if available) and session token usage."""
|
||||
if not self.agent:
|
||||
if not self._print_nous_credits_block():
|
||||
print("(._.) No active agent -- send a message first.")
|
||||
print("(._.) No active agent -- send a message first.")
|
||||
return
|
||||
|
||||
agent = self.agent
|
||||
calls = agent.session_api_calls
|
||||
|
||||
if calls == 0:
|
||||
if not self._print_nous_credits_block():
|
||||
print("(._.) No API calls made yet in this session.")
|
||||
print("(._.) No API calls made yet in this session.")
|
||||
return
|
||||
|
||||
# ── Rate limits (shown first when available) ────────────────
|
||||
@@ -10704,10 +10621,6 @@ class HermesCLI:
|
||||
for line in account_lines:
|
||||
print(line)
|
||||
|
||||
# Nous credits magnitudes + monthly-grant gauge (agent-independent — also
|
||||
# runs at the no-agent / no-calls early-returns above). See the helper.
|
||||
self._print_nous_credits_block()
|
||||
|
||||
if self.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
|
||||
@@ -10722,28 +10635,6 @@ class HermesCLI:
|
||||
# Console quietness is enforced by hermes_logging not
|
||||
# installing a console StreamHandler in non-verbose mode.
|
||||
|
||||
def _print_nous_credits_block(self) -> bool:
|
||||
"""Print the Nous credits magnitudes + monthly-grant gauge when a Nous account
|
||||
is logged in. Returns True if it printed anything.
|
||||
|
||||
Delegates to the shared ``agent.account_usage.nous_credits_lines`` helper —
|
||||
the single source for the /usage credits block across CLI, gateway, and TUI.
|
||||
It's agent-independent (a portal fetch gated on "a Nous account is logged in",
|
||||
NOT the inference-provider string), so /usage shows the block even in the TUI
|
||||
slash-worker subprocess that resumes WITHOUT a live agent. Fail-open and
|
||||
wall-clock-bounded inside the helper; also honors HERMES_DEV_CREDITS_FIXTURE
|
||||
for offline testing — same behavior as every other surface.
|
||||
"""
|
||||
from agent.account_usage import nous_credits_lines
|
||||
|
||||
lines = nous_credits_lines()
|
||||
if not lines:
|
||||
return False
|
||||
print()
|
||||
for line in lines:
|
||||
print(f" {line}")
|
||||
return True
|
||||
|
||||
def _show_insights(self, command: str = "/insights"):
|
||||
"""Show usage insights and analytics from session history."""
|
||||
# Parse optional --days flag
|
||||
@@ -12404,7 +12295,7 @@ class HermesCLI:
|
||||
w = self._scrollback_box_width(getattr(self.console, "width", 80))
|
||||
label = " ⚕ Hermes "
|
||||
if self.show_timestamps:
|
||||
label = f"{label}{datetime.now().strftime('%H:%M')} "
|
||||
label = f"{label}{datetime.now().strftime(getattr(self, 'timestamp_format', '%H:%M'))} "
|
||||
fill = w - 2 - HermesCLI._status_bar_display_width(label)
|
||||
_cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
|
||||
_cprint(f"{_STREAM_PAD}{sentence.rstrip()}")
|
||||
@@ -12498,11 +12389,6 @@ class HermesCLI:
|
||||
"error": _summary,
|
||||
}
|
||||
finally:
|
||||
# Surface any credit notices queued during the turn (cold-start
|
||||
# seed / per-turn capture) now that the response is done — printing
|
||||
# at this boundary paints cleanly above the prompt instead of being
|
||||
# buried behind the streaming output.
|
||||
self._flush_credit_notices()
|
||||
# Clear thread-local callbacks so a reused thread doesn't
|
||||
# hold stale references to a disposed CLI instance.
|
||||
try:
|
||||
@@ -13209,16 +13095,6 @@ class HermesCLI:
|
||||
_welcome_color = "#FFF8DC"
|
||||
self._console_print(f"[{_welcome_color}]{_welcome_text}[/]")
|
||||
|
||||
# Warm the /model picker's provider-models cache off-thread during this
|
||||
# idle window (banner shown, user about to type). The no-args picker
|
||||
# otherwise blocks ~1-2s on serial /v1/models fetches the first time
|
||||
# it's opened in a session. Fire-and-forget, guarded once-per-process.
|
||||
try:
|
||||
from hermes_cli.model_switch import prewarm_picker_cache_async
|
||||
prewarm_picker_cache_async()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Redaction opt-out warning (#17691): ON by default, loud when off.
|
||||
# The redactor snapshots its state at import time so any toggle now
|
||||
# won't affect the running process — we just want the operator to
|
||||
|
||||
@@ -1366,7 +1366,8 @@ DEFAULT_CONFIG = {
|
||||
"bell_on_complete": False,
|
||||
"show_reasoning": False,
|
||||
"streaming": False,
|
||||
"timestamps": False, # Show [HH:MM] on user and assistant labels
|
||||
"timestamps": False, # Show timestamp on user and assistant labels
|
||||
"timestamp_format": "%H:%M", # strftime format for timestamps (e.g. "%b-%d %H:%M")
|
||||
"final_response_markdown": "strip", # render | strip | raw
|
||||
# Preserve recent classic CLI output across Ctrl+L, /redraw, and
|
||||
# terminal resize full-screen clears. Disable if a terminal emulator
|
||||
@@ -1677,9 +1678,9 @@ DEFAULT_CONFIG = {
|
||||
# "low", "minimal", "none" (empty = inherit parent's level)
|
||||
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
|
||||
# Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
|
||||
# and _get_orchestrator_enabled). Floored at 1, no upper ceiling —
|
||||
# raise deliberately, each level multiplies API cost.
|
||||
"max_spawn_depth": 1, # depth (1 = flat [default], 2 = orchestrator→leaf, 3+ = deeper)
|
||||
# and _get_orchestrator_enabled). Values are clamped to [1, 3] with a
|
||||
# warning log if out of range.
|
||||
"max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
|
||||
"orchestrator_enabled": True, # kill switch for role="orchestrator"
|
||||
# When a subagent hits a dangerous-command approval prompt, the parent's
|
||||
# prompt_toolkit TUI owns stdin — a thread-local input() call from the
|
||||
@@ -1841,28 +1842,6 @@ DEFAULT_CONFIG = {
|
||||
# real memory cost. Default 32 MiB matches the historical hardcoded
|
||||
# cap. Set to 0 for no cap. Env override: DISCORD_MAX_ATTACHMENT_BYTES.
|
||||
"max_attachment_bytes": 33554432,
|
||||
# Voice-channel audio effects (the continuous mixer). OFF by default.
|
||||
# When enabled, the bot installs a software mixer on the outgoing voice
|
||||
# stream so a low ambient "thinking" bed, verbal acknowledgements, and
|
||||
# TTS replies can OVERLAP (ducking the ambient under speech) instead of
|
||||
# stop-and-swap — the Grok-voice-mode feel. discord.py ships no mixer;
|
||||
# this is implemented in plugins/platforms/discord/voice_mixer.py.
|
||||
"voice_fx": {
|
||||
"enabled": False, # master switch for the mixer subsystem
|
||||
"ambient_enabled": True, # play the idle "thinking" bed while tools run
|
||||
"ambient_path": "", # custom loop audio file; "" = synthesised pad
|
||||
"ambient_gain": 0.18, # idle bed loudness, 0.0–1.0
|
||||
"duck_gain": 0.06, # ambient loudness while speech plays
|
||||
"speech_gain": 1.0, # TTS / ack loudness, 0.0–1.0
|
||||
"ack_enabled": True, # speak a short phrase before the first tool call
|
||||
"ack_phrases": [ # picked at random; set [] to disable phrases
|
||||
"Let me look into that.",
|
||||
"One moment.",
|
||||
"Checking on that now.",
|
||||
"Give me a sec.",
|
||||
"On it.",
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
# WhatsApp platform settings (gateway mode)
|
||||
@@ -2275,22 +2254,6 @@ DEFAULT_CONFIG = {
|
||||
# disable backups entirely, set ``pre_update_backup: false`` above
|
||||
# rather than ``backup_keep: 0``.
|
||||
"backup_keep": 5,
|
||||
# What `hermes update` does with uncommitted local changes to the
|
||||
# source tree when it runs NON-interactively — i.e. triggered from
|
||||
# the desktop/chat app or the gateway, where there's no TTY to answer
|
||||
# a restore prompt. Interactive (terminal) updates are unaffected:
|
||||
# they always stash the changes and ask whether to restore, exactly
|
||||
# as they always have.
|
||||
# "stash" — auto-stash the changes, pull, then auto-restore them
|
||||
# on top of the updated code (the safe default; nothing
|
||||
# is ever lost — conflicts are preserved in a git stash).
|
||||
# "discard" — auto-stash the changes and throw the stash away after
|
||||
# the pull. Use this only if you never intend to keep
|
||||
# local edits to the source tree on this machine.
|
||||
# Stash-and-drop (not `reset --hard` + `clean -fd`) so
|
||||
# ignored paths — node_modules, venv, build outputs —
|
||||
# are never touched.
|
||||
"non_interactive_local_changes": "stash",
|
||||
},
|
||||
|
||||
# Language Server Protocol — semantic diagnostics from real
|
||||
@@ -2420,7 +2383,7 @@ DEFAULT_CONFIG = {
|
||||
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 27,
|
||||
"_config_version": 26,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
@@ -3975,7 +3938,7 @@ _KNOWN_ROOT_KEYS = {
|
||||
"fallback_providers", "credential_pool_strategies", "toolsets",
|
||||
"agent", "terminal", "display", "compression", "delegation",
|
||||
"auxiliary", "custom_providers", "context", "memory", "gateway",
|
||||
"sessions", "streaming", "updates",
|
||||
"sessions", "streaming",
|
||||
}
|
||||
|
||||
# Valid fields inside a custom_providers list entry
|
||||
|
||||
Reference in New Issue
Block a user