feat(config): add display.timestamp_format and honor it in CLI timestamps

Salvaged from #40303; re-verified on main, tightened, tested. Co-authored-by: pdmartins <pdmartins@users.noreply.github.com>
2026-06-10 04:08:28 +08:00 · 2026-06-06 08:49:52 -07:00
2 changed files with 21 additions and 182 deletions
--- a/cli.py
+++ b/cli.py
@@ -974,7 +974,7 @@ def _run_cleanup():
    try:
        from tools.mcp_tool import shutdown_mcp_servers
        shutdown_mcp_servers()
-    except BaseException:
+    except Exception:
        pass
    # Close cached auxiliary LLM clients (sync + async) so that
    # AsyncHttpxClientWrapper.__del__ doesn't fire on a closed event loop
@@ -3144,8 +3144,9 @@ class HermesCLI:
        
        # streaming: stream tokens to the terminal as they arrive (display.streaming in config.yaml)
        self.streaming_enabled = CLI_CONFIG["display"].get("streaming", False)
-        # show_timestamps: prefix user and assistant labels with [HH:MM]
+        # show_timestamps: prefix user and assistant labels with timestamps
        self.show_timestamps = CLI_CONFIG["display"].get("timestamps", False)
+        self.timestamp_format = CLI_CONFIG["display"].get("timestamp_format", "%H:%M")
        self.final_response_markdown = str(
            CLI_CONFIG["display"].get("final_response_markdown", "strip")
        ).strip().lower() or "strip"
@@ -3194,18 +3195,6 @@ class HermesCLI:
        _config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
        _DEFAULT_CONFIG_MODEL = ""
        self.model = model or _config_model or _DEFAULT_CONFIG_MODEL
-        # Read max_tokens from config (env var override: HERMES_MAX_TOKENS)
-        _env_mt = os.environ.get("HERMES_MAX_TOKENS")
-        if _env_mt:
-            try:
-                self.max_tokens = int(_env_mt)
-            except (ValueError, TypeError):
-                self.max_tokens = None
-        elif isinstance(_model_config, dict):
-            _mt = _model_config.get("max_tokens")
-            self.max_tokens = _mt if isinstance(_mt, int) else None
-        else:
-            self.max_tokens = None
        # Auto-detect model from local server if still on default
        if self.model == _DEFAULT_CONFIG_MODEL:
            _base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else ""
@@ -4234,52 +4223,6 @@ class HermesCLI:
        self._tool_start_time = 0.0  # clear tool timer when switching to thinking
        self._invalidate()

-    def _on_notice(self, notice) -> None:
-        """Queue an out-of-band AgentNotice for rendering at the next clean boundary.
-
-        Notices fire from inside the agent turn (cold-start seed during _init_agent,
-        per-turn _capture_credits after the API call) — printing immediately races the
-        streaming response and the line gets buried behind the prompt (see _cprint's
-        bg-thread caveat). So we QUEUE here and flush in _flush_credit_notices(), called
-        right after run_conversation returns. Fail-soft: never break the turn.
-        """
-        try:
-            text = getattr(notice, "text", "") or ""
-            if not text:
-                return
-            level = getattr(notice, "level", "info") or "info"
-            if not hasattr(self, "_pending_credit_notices"):
-                self._pending_credit_notices = []
-            self._pending_credit_notices.append((level, text))
-        except Exception:
-            pass
-
-    def _flush_credit_notices(self) -> None:
-        """Print any queued credit notices as level-colored lines. Called at turn end
-        (after run_conversation) where _cprint paints cleanly above the prompt."""
-        try:
-            pending = getattr(self, "_pending_credit_notices", None)
-            if not pending:
-                return
-            self._pending_credit_notices = []
-            for level, text in pending:
-                color = {
-                    "error": "\033[31m",
-                    "warn": "\033[33m",
-                    "success": "\033[32m",
-                    "info": _DIM,
-                }.get(level, _DIM)
-                _cprint(f"  {color}{text}{_RST}")
-        except Exception:
-            pass
-
-    def _on_notice_clear(self, key: str) -> None:
-        """Notice cleared. The REPL prints lines (no persistent slot to wipe), so
-        this drops any still-queued notice with that key is not tracked by key here;
-        it's a no-op for rendering — kept so the agent's clear callback is bound
-        symmetrically with the show callback (and so future REPL UIs can hook it)."""
-        return
-
    # ── Streaming display ────────────────────────────────────────────────

    def _current_reasoning_callback(self):
@@ -4377,7 +4320,7 @@ class HermesCLI:
    def _format_submitted_user_message_preview(self, user_input: str) -> str:
        """Format the submitted user-message scrollback preview."""
        ts_suffix = (
-            f" [dim]{datetime.now().strftime('%H:%M')}[/]"
+            f" [dim]{datetime.now().strftime(getattr(self, 'timestamp_format', '%H:%M'))}[/]"
            if getattr(self, "show_timestamps", False) else ""
        )
        lines = user_input.split("\n")
@@ -4672,7 +4615,7 @@ class HermesCLI:
            except (ValueError, IndexError):
                self._stream_text_ansi = ""
            if self.show_timestamps:
-                label = f"{label} {datetime.now().strftime('%H:%M')}"
+                label = f"{label} {datetime.now().strftime(getattr(self, 'timestamp_format', '%H:%M'))}"
            w = self._scrollback_box_width()
            fill = w - 2 - HermesCLI._status_bar_display_width(label)
            _cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
@@ -5155,9 +5098,9 @@ class HermesCLI:
                resolved_id = self.session_id
            if resolved_id and resolved_id != self.session_id:
                ChatConsole().print(
-                    f"[dim]Session {_escape(self.session_id)} was compressed into "
+                    f"[{_DIM}]Session {_escape(self.session_id)} was compressed into "
                    f"{_escape(resolved_id)}; resuming the descendant with your "
-                    f"transcript.[/dim]"
+                    f"transcript.[/]"
                )
                self.session_id = resolved_id
                resolved_meta = self._session_db.get_session(self.session_id)
@@ -5226,7 +5169,6 @@ class HermesCLI:
                acp_command=runtime.get("command"),
                acp_args=runtime.get("args"),
                credential_pool=runtime.get("credential_pool"),
-                max_tokens=self.max_tokens,
                max_iterations=self.max_turns,
                enabled_toolsets=self.enabled_toolsets,
                disabled_toolsets=self.disabled_toolsets,
@@ -5264,8 +5206,6 @@ class HermesCLI:
                tool_complete_callback=self._on_tool_complete if self._inline_diffs_enabled else None,
                stream_delta_callback=self._stream_delta if self.streaming_enabled else None,
                tool_gen_callback=self._on_tool_gen_start if self.streaming_enabled else None,
-                notice_callback=self._on_notice,
-                notice_clear_callback=self._on_notice_clear,
            )
            # Store reference for atexit memory provider shutdown
            global _active_agent_ref
@@ -5273,16 +5213,6 @@ class HermesCLI:
            # Route agent status output through prompt_toolkit so ANSI escape
            # sequences aren't garbled by patch_stdout's StdoutProxy (#2262).
            self.agent._print_fn = _cprint
-            # Hydrate credits notices at session OPEN (parity with the TUI), so a
-            # depletion / usage-band warning shows before the first message. The
-            # notice_callback is bound above → _on_notice renders the line. Idempotent
-            # + fail-open inside the helper; harmless for non-Nous providers.
-            try:
-                from agent.credits_tracker import seed_credits_at_session_start
-
-                seed_credits_at_session_start(self.agent)
-            except Exception:
-                pass
            self._active_agent_route_signature = (
                effective_model,
                runtime.get("provider"),
@@ -5449,7 +5379,7 @@ class HermesCLI:
            if quiet:
                print(msg, file=sys.stderr)
            else:
-                self._console_print(f"[dim]{_escape(msg)}[/dim]")
+                self._console_print(f"[{_DIM}]{_escape(msg)}[/]")
            return

        try:
@@ -5459,7 +5389,7 @@ class HermesCLI:
            if quiet:
                print(msg, file=sys.stderr)
            else:
-                self._console_print(f"[dim]{_escape(msg)}[/dim]")
+                self._console_print(f"[{_DIM}]{_escape(msg)}[/]")
            return

        # Retarget the terminal/code-exec tools to match the process cwd.
@@ -5469,7 +5399,7 @@ class HermesCLI:
        if quiet:
            print(msg, file=sys.stderr)
        else:
-            self._console_print(f"[dim]{_escape(msg)}[/dim]")
+            self._console_print(f"[{_DIM}]{_escape(msg)}[/]")

    def _preload_resumed_session(self) -> bool:
        """Load a resumed session's history from the DB early (before first chat).
@@ -9073,10 +9003,6 @@ class HermesCLI:
        elif canonical == "update":
            if self._handle_update_command():
                return False
-        elif canonical == "version":
-            from hermes_cli.main import _print_version_info
-
-            _print_version_info(check_updates=True)
        elif canonical == "paste":
            self._handle_paste_command()
        elif canonical == "image":
@@ -9359,7 +9285,6 @@ class HermesCLI:
                    api_mode=turn_route["runtime"].get("api_mode"),
                    acp_command=turn_route["runtime"].get("command"),
                    acp_args=turn_route["runtime"].get("args"),
-                    max_tokens=turn_route["runtime"].get("max_tokens"),
                    max_iterations=self.max_turns,
                    enabled_toolsets=self.enabled_toolsets,
                    quiet_mode=True,
@@ -10593,24 +10518,16 @@ class HermesCLI:
        return True

    def _show_usage(self):
-        """Rate limits + session token usage (when a live agent exists) + Nous credits.
-
-        The Nous credits block is agent-independent (a portal fetch), so it runs even
-        with no live agent — important for the TUI, where /usage runs in a slash-worker
-        subprocess that resumes the session WITHOUT building an agent (self.agent is None),
-        which would otherwise early-return before any credits showed.
-        """
+        """Show rate limits (if available) and session token usage."""
        if not self.agent:
-            if not self._print_nous_credits_block():
-                print("(._.) No active agent -- send a message first.")
+            print("(._.) No active agent -- send a message first.")
            return

        agent = self.agent
        calls = agent.session_api_calls

        if calls == 0:
-            if not self._print_nous_credits_block():
-                print("(._.) No API calls made yet in this session.")
+            print("(._.) No API calls made yet in this session.")
            return

        # ── Rate limits (shown first when available) ────────────────
@@ -10704,10 +10621,6 @@ class HermesCLI:
            for line in account_lines:
                print(line)

-        # Nous credits magnitudes + monthly-grant gauge (agent-independent — also
-        # runs at the no-agent / no-calls early-returns above). See the helper.
-        self._print_nous_credits_block()
-
        if self.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
            for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
@@ -10722,28 +10635,6 @@ class HermesCLI:
            # Console quietness is enforced by hermes_logging not
            # installing a console StreamHandler in non-verbose mode.

-    def _print_nous_credits_block(self) -> bool:
-        """Print the Nous credits magnitudes + monthly-grant gauge when a Nous account
-        is logged in. Returns True if it printed anything.
-
-        Delegates to the shared ``agent.account_usage.nous_credits_lines`` helper —
-        the single source for the /usage credits block across CLI, gateway, and TUI.
-        It's agent-independent (a portal fetch gated on "a Nous account is logged in",
-        NOT the inference-provider string), so /usage shows the block even in the TUI
-        slash-worker subprocess that resumes WITHOUT a live agent. Fail-open and
-        wall-clock-bounded inside the helper; also honors HERMES_DEV_CREDITS_FIXTURE
-        for offline testing — same behavior as every other surface.
-        """
-        from agent.account_usage import nous_credits_lines
-
-        lines = nous_credits_lines()
-        if not lines:
-            return False
-        print()
-        for line in lines:
-            print(f"  {line}")
-        return True
-
    def _show_insights(self, command: str = "/insights"):
        """Show usage insights and analytics from session history."""
        # Parse optional --days flag
@@ -12404,7 +12295,7 @@ class HermesCLI:
                        w = self._scrollback_box_width(getattr(self.console, "width", 80))
                        label = " ⚕ Hermes "
                        if self.show_timestamps:
-                            label = f"{label}{datetime.now().strftime('%H:%M')} "
+                            label = f"{label}{datetime.now().strftime(getattr(self, 'timestamp_format', '%H:%M'))} "
                        fill = w - 2 - HermesCLI._status_bar_display_width(label)
                        _cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
                    _cprint(f"{_STREAM_PAD}{sentence.rstrip()}")
@@ -12498,11 +12389,6 @@ class HermesCLI:
                        "error": _summary,
                    }
                finally:
-                    # Surface any credit notices queued during the turn (cold-start
-                    # seed / per-turn capture) now that the response is done — printing
-                    # at this boundary paints cleanly above the prompt instead of being
-                    # buried behind the streaming output.
-                    self._flush_credit_notices()
                    # Clear thread-local callbacks so a reused thread doesn't
                    # hold stale references to a disposed CLI instance.
                    try:
@@ -13209,16 +13095,6 @@ class HermesCLI:
            _welcome_color = "#FFF8DC"
        self._console_print(f"[{_welcome_color}]{_welcome_text}[/]")

-        # Warm the /model picker's provider-models cache off-thread during this
-        # idle window (banner shown, user about to type). The no-args picker
-        # otherwise blocks ~1-2s on serial /v1/models fetches the first time
-        # it's opened in a session. Fire-and-forget, guarded once-per-process.
-        try:
-            from hermes_cli.model_switch import prewarm_picker_cache_async
-            prewarm_picker_cache_async()
-        except Exception:
-            pass
-
        # Redaction opt-out warning (#17691): ON by default, loud when off.
        # The redactor snapshots its state at import time so any toggle now
        # won't affect the running process — we just want the operator to
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1366,7 +1366,8 @@ DEFAULT_CONFIG = {
        "bell_on_complete": False,
        "show_reasoning": False,
        "streaming": False,
-        "timestamps": False,      # Show [HH:MM] on user and assistant labels
+        "timestamps": False,      # Show timestamp on user and assistant labels
+        "timestamp_format": "%H:%M",  # strftime format for timestamps (e.g. "%b-%d %H:%M")
        "final_response_markdown": "strip",  # render | strip | raw
        # Preserve recent classic CLI output across Ctrl+L, /redraw, and
        # terminal resize full-screen clears. Disable if a terminal emulator
@@ -1677,9 +1678,9 @@ DEFAULT_CONFIG = {
                                 # "low", "minimal", "none" (empty = inherit parent's level)
        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
        # Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
-        # and _get_orchestrator_enabled).  Floored at 1, no upper ceiling —
-        # raise deliberately, each level multiplies API cost.
-        "max_spawn_depth": 1,        # depth (1 = flat [default], 2 = orchestrator→leaf, 3+ = deeper)
+        # and _get_orchestrator_enabled).  Values are clamped to [1, 3] with a
+        # warning log if out of range.
+        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
        # When a subagent hits a dangerous-command approval prompt, the parent's
        # prompt_toolkit TUI owns stdin — a thread-local input() call from the
@@ -1841,28 +1842,6 @@ DEFAULT_CONFIG = {
        # real memory cost. Default 32 MiB matches the historical hardcoded
        # cap. Set to 0 for no cap. Env override: DISCORD_MAX_ATTACHMENT_BYTES.
        "max_attachment_bytes": 33554432,
-        # Voice-channel audio effects (the continuous mixer). OFF by default.
-        # When enabled, the bot installs a software mixer on the outgoing voice
-        # stream so a low ambient "thinking" bed, verbal acknowledgements, and
-        # TTS replies can OVERLAP (ducking the ambient under speech) instead of
-        # stop-and-swap — the Grok-voice-mode feel. discord.py ships no mixer;
-        # this is implemented in plugins/platforms/discord/voice_mixer.py.
-        "voice_fx": {
-            "enabled": False,         # master switch for the mixer subsystem
-            "ambient_enabled": True,  # play the idle "thinking" bed while tools run
-            "ambient_path": "",       # custom loop audio file; "" = synthesised pad
-            "ambient_gain": 0.18,     # idle bed loudness, 0.0–1.0
-            "duck_gain": 0.06,        # ambient loudness while speech plays
-            "speech_gain": 1.0,       # TTS / ack loudness, 0.0–1.0
-            "ack_enabled": True,      # speak a short phrase before the first tool call
-            "ack_phrases": [          # picked at random; set [] to disable phrases
-                "Let me look into that.",
-                "One moment.",
-                "Checking on that now.",
-                "Give me a sec.",
-                "On it.",
-            ],
-        },
    },

    # WhatsApp platform settings (gateway mode)
@@ -2275,22 +2254,6 @@ DEFAULT_CONFIG = {
        # disable backups entirely, set ``pre_update_backup: false`` above
        # rather than ``backup_keep: 0``.
        "backup_keep": 5,
-        # What `hermes update` does with uncommitted local changes to the
-        # source tree when it runs NON-interactively — i.e. triggered from
-        # the desktop/chat app or the gateway, where there's no TTY to answer
-        # a restore prompt. Interactive (terminal) updates are unaffected:
-        # they always stash the changes and ask whether to restore, exactly
-        # as they always have.
-        #   "stash"   — auto-stash the changes, pull, then auto-restore them
-        #               on top of the updated code (the safe default; nothing
-        #               is ever lost — conflicts are preserved in a git stash).
-        #   "discard" — auto-stash the changes and throw the stash away after
-        #               the pull. Use this only if you never intend to keep
-        #               local edits to the source tree on this machine.
-        #               Stash-and-drop (not `reset --hard` + `clean -fd`) so
-        #               ignored paths — node_modules, venv, build outputs —
-        #               are never touched.
-        "non_interactive_local_changes": "stash",
    },

    # Language Server Protocol — semantic diagnostics from real
@@ -2420,7 +2383,7 @@ DEFAULT_CONFIG = {


    # Config schema version - bump this when adding new required fields
-    "_config_version": 27,
+    "_config_version": 26,
 }

 # =============================================================================
@@ -3975,7 +3938,7 @@ _KNOWN_ROOT_KEYS = {
    "fallback_providers", "credential_pool_strategies", "toolsets",
    "agent", "terminal", "display", "compression", "delegation",
    "auxiliary", "custom_providers", "context", "memory", "gateway",
-    "sessions", "streaming", "updates",
+    "sessions", "streaming",
 }

 # Valid fields inside a custom_providers list entry