diff --git a/cli.py b/cli.py index 18d61ea772..c76ec217de 100644 --- a/cli.py +++ b/cli.py @@ -3114,6 +3114,8 @@ class HermesCLI: # Collect displayable entries (skip system, tool-result messages) entries = [] # list of (role, display_text) + _last_asst_idx = None # index of last assistant entry + _last_asst_full = None # un-truncated display text for last assistant for msg in self.conversation_history: role = msg.get("role", "") content = msg.get("content") @@ -3143,7 +3145,9 @@ class HermesCLI: text = "" if content is None else str(content) text = _strip_reasoning(text) parts = [] + full_parts = [] # un-truncated version if text: + full_parts.append(text) lines = text.splitlines() if len(lines) > MAX_ASST_LINES: text = "\n".join(lines[:MAX_ASST_LINES]) + " ..." @@ -3163,11 +3167,15 @@ class HermesCLI: if len(names) > 4: names_str += ", ..." noun = "call" if tc_count == 1 else "calls" - parts.append(f"[{tc_count} tool {noun}: {names_str}]") + tc_summary = f"[{tc_count} tool {noun}: {names_str}]" + parts.append(tc_summary) + full_parts.append(tc_summary) if not parts: # Skip pure-reasoning messages that have no visible output continue entries.append(("assistant", " ".join(parts))) + _last_asst_idx = len(entries) - 1 + _last_asst_full = " ".join(full_parts) if not entries: return @@ -3178,6 +3186,13 @@ class HermesCLI: skipped = len(entries) - MAX_DISPLAY_EXCHANGES * 2 entries = entries[skipped:] + # Replace last assistant entry with full (un-truncated) text + # so the user can see where they left off without wasting tokens. + if _last_asst_idx is not None and _last_asst_full: + adj_idx = _last_asst_idx - skipped + if 0 <= adj_idx < len(entries): + entries[adj_idx] = ("assistant_last", _last_asst_full) + # Build the display using Rich from rich.panel import Panel from rich.text import Text @@ -3210,6 +3225,13 @@ class HermesCLI: lines.append(msg_lines[0] + "\n", style="dim") for ml in msg_lines[1:]: lines.append(f" {ml}\n", style="dim") + elif role == "assistant_last": + # Last assistant response shown in full, non-dim + lines.append(" ◆ Hermes: ", style=f"bold {_assistant_label_c}") + msg_lines = text.splitlines() + lines.append(msg_lines[0] + "\n", style="") + for ml in msg_lines[1:]: + lines.append(f" {ml}\n", style="") else: lines.append(" ◆ Hermes: ", style=f"dim bold {_assistant_label_c}") msg_lines = text.splitlines() diff --git a/tests/cli/test_resume_display.py b/tests/cli/test_resume_display.py index d0c156d13a..d183e48b2b 100644 --- a/tests/cli/test_resume_display.py +++ b/tests/cli/test_resume_display.py @@ -180,33 +180,71 @@ class TestDisplayResumedHistory: assert 200 <= a_count <= 310 # roughly 300 chars (±panel padding) def test_long_assistant_message_truncated(self): + """Non-last assistant messages are still truncated.""" cli = _make_cli() long_text = "B" * 400 cli.conversation_history = [ {"role": "user", "content": "Tell me a lot."}, {"role": "assistant", "content": long_text}, + {"role": "user", "content": "And more?"}, + {"role": "assistant", "content": "Short final reply."}, ] output = self._capture_display(cli) - assert "..." in output + # The non-last assistant message should be truncated assert "B" * 400 not in output + # The last assistant message shown in full + assert "Short final reply." in output def test_multiline_assistant_truncated(self): + """Non-last multiline assistant messages are truncated to 3 lines.""" cli = _make_cli() multi = "\n".join([f"Line {i}" for i in range(20)]) cli.conversation_history = [ {"role": "user", "content": "Show me lines."}, {"role": "assistant", "content": multi}, + {"role": "user", "content": "What else?"}, + {"role": "assistant", "content": "Done."}, ] output = self._capture_display(cli) - # First 3 lines should be there + # First 3 lines of non-last assistant should be there assert "Line 0" in output assert "Line 1" in output assert "Line 2" in output - # Line 19 should NOT be there (truncated after 3 lines) + # Line 19 should NOT be in the truncated message assert "Line 19" not in output + def test_last_assistant_response_shown_in_full(self): + """The last assistant response is shown un-truncated so the user + knows where they left off without wasting tokens re-asking.""" + cli = _make_cli() + long_text = "X" * 500 + cli.conversation_history = [ + {"role": "user", "content": "Tell me everything."}, + {"role": "assistant", "content": long_text}, + ] + output = self._capture_display(cli) + + # Full 500-char text should be present (may be line-wrapped by Rich) + x_count = output.count("X") + assert x_count >= 490 # allow small Rich formatting variance + + def test_last_assistant_multiline_shown_in_full(self): + """The last assistant response shows all lines, not just 3.""" + cli = _make_cli() + multi = "\n".join([f"Line {i}" for i in range(20)]) + cli.conversation_history = [ + {"role": "user", "content": "Show me everything."}, + {"role": "assistant", "content": multi}, + ] + output = self._capture_display(cli) + + # All 20 lines should be present since it's the last response + assert "Line 0" in output + assert "Line 10" in output + assert "Line 19" in output + def test_large_history_shows_truncation_indicator(self): cli = _make_cli() cli.conversation_history = _large_history(n_exchanges=15)