hermes_cli/voice.py

"""Process-wide voice recording + TTS API for the TUI gateway.

Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
(text-to-speech) behind idempotent, stateful entry points that the gateway's
``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
call from a dedicated thread. The gateway imports this module lazily so that
missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
an ``ImportError`` at call time, not at startup.

Two usage modes are exposed:

* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
  manually-bounded capture used when the caller drives the start/stop pair
  explicitly.
* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
  the classic CLI voice mode: recording auto-stops on silence, transcribes,
  hands the result to a callback, and then auto-restarts for the next turn.
  Three consecutive no-speech cycles stop the loop and fire
  ``on_silent_limit`` so the UI can turn the mode off.
"""

from __future__ import annotations

import logging
import os
import sys
import threading
from typing import Any, Callable, Optional

# Modifier aliases mirrored from the TUI parser (``ui-tui/src/lib/platform.ts``)
# ``_MOD_ALIASES`` table — the contract that removes the cross-runtime
# mismatch Copilot flagged in round-9 on #19835.
#
# ``super``/``win``/``windows`` are intentionally absent: prompt_toolkit
# has no super/meta modifier for the Cmd key, so those spellings are
# TUI-only. The normalizer below returns the documented default
# (``c-b``) for them — a silent fallback was preferred to a hard
# startup crash (Copilot round-11). The CLI binding site
# (``_register_voice_handler`` in cli.py) logs a warning when that
# fallback fires so users see why their TUI-only shortcut isn't
# bound in the classic CLI.
_VOICE_MOD_ALIASES = {
    "ctrl": "c-",
    "control": "c-",
    "alt": "a-",
    "option": "a-",
    "opt": "a-",
}

# Named keys prompt_toolkit accepts in ``c-<name>`` / ``a-<name>`` form.
# Aliases collapse to prompt_toolkit's canonical spelling so the same
# config value binds identically in both runtimes (Copilot round-10 on
# #19835).
_VOICE_NAMED_KEYS = {
    "space": "space",
    "spc": "space",
    "enter": "enter",
    "return": "enter",
    "ret": "enter",
    "tab": "tab",
    "escape": "escape",
    "esc": "escape",
    "backspace": "backspace",
    "bs": "backspace",
    "delete": "delete",
    "del": "delete",
}

# ``useInputHandlers()`` intercepts these before the voice check runs,
# so a binding like ``ctrl+c`` (interrupt), ``ctrl+d`` (quit), or
# ``ctrl+l`` (clear screen) would be advertised in /voice status but
# never fire push-to-talk — the same blocklist the TUI parser uses.
_VOICE_RESERVED_CTRL_CHARS = frozenset({"c", "d", "l"})

# On macOS the classic CLI's prompt_toolkit bindings for copy / exit /
# clear also claim ``a-c`` / ``a-d`` / ``a-l`` via the action-modifier
# lookup, and hermes-ink reports Alt as ``key.meta`` on many terminals.
# Mirror the TUI parser's darwin-only reservation so ``option+c`` etc.
# don't bind Alt+C in the CLI while the TUI silently falls back to
# Ctrl+B (Copilot round-14 on #19835).
_VOICE_RESERVED_ALT_CHARS_MAC = frozenset({"c", "d", "l"})

_DEFAULT_PT_KEY = "c-b"


def voice_record_key_from_config(cfg: Any) -> Any:
    """Shape-safe ``cfg.voice.record_key`` lookup.

    ``load_config()`` deep-merges raw YAML and preserves scalar
    overrides, so a hand-edited ``voice: true`` / ``voice: cmd+b``
    leaves ``cfg["voice"]`` as a bool/str instead of a dict, and the
    naive ``.get("voice", {}).get("record_key")`` chain raises
    AttributeError before voice can even start (Copilot round-11 on
    #19835). Return ``None`` for malformed shapes so call sites can
    feed the result straight into the normalizer/formatter and get
    the documented default.
    """
    if not isinstance(cfg, dict):
        return None

    voice = cfg.get("voice")
    if not isinstance(voice, dict):
        return None

    return voice.get("record_key")


def normalize_voice_record_key_for_prompt_toolkit(raw: Any) -> str:
    """Coerce ``voice.record_key`` into prompt_toolkit's ``c-x`` / ``a-x`` format.

    Mirrors the TUI parser contract (``ui-tui/src/lib/platform.ts``)
    so one config value binds the same shortcut in both runtimes:

    * non-string / empty / typo'd / bare-char / multi-modifier / reserved
      ``ctrl+c|d|l`` → documented default ``c-b``
    * single-char keys: ``ctrl+o`` → ``c-o``
    * named keys: ``ctrl+space`` → ``c-space`` (aliases collapse:
      ``ctrl+return`` → ``c-enter``)
    * ``super`` / ``win`` / ``windows`` → ``c-b`` (TUI-only modifiers —
      prompt_toolkit has no super mod; the CLI binding site is
      expected to warn when this fallback fires so users see the
      cross-runtime split, Copilot round-11 on #19835)
    """
    if not isinstance(raw, str):
        return _DEFAULT_PT_KEY

    lowered = raw.strip().lower()
    if not lowered:
        return _DEFAULT_PT_KEY

    parts = [p.strip() for p in lowered.split("+") if p.strip()]
    if not parts:
        return _DEFAULT_PT_KEY

    # Multi-modifier chords like ``ctrl+alt+r`` bind different shortcuts
    # in prompt_toolkit (a-c-r form) and hermes-ink rejects them; collapse
    # to the documented default instead of silently diverging.
    if len(parts) > 2:
        return _DEFAULT_PT_KEY

    # Bare char / bare named key (no explicit modifier) — the CLI's
    # prompt_toolkit binds the raw key without a modifier, which the TUI
    # parser refuses; reject here too so both runtimes agree.
    if len(parts) == 1:
        return _DEFAULT_PT_KEY

    modifier_token, key_token = parts

    # ``super`` / ``win`` / ``windows`` are TUI-only (prompt_toolkit has
    # no super modifier, so ``@kb.add(super+b)`` crashes the CLI at
    # startup). Fall back to the documented default here; the CLI
    # binding site is expected to log a warning when the configured
    # value is one of these spellings so users know the TUI+CLI
    # runtimes diverge on that shortcut (Copilot round-11 on #19835).
    if modifier_token in {"super", "win", "windows"}:
        return _DEFAULT_PT_KEY

    normalized_mod = _VOICE_MOD_ALIASES.get(modifier_token)
    if not normalized_mod:
        return _DEFAULT_PT_KEY

    # Single-char key: reject reserved-ctrl chords that the TUI would
    # also block at parse time, plus the mac-only alt reservation.
    if len(key_token) == 1:
        if normalized_mod == "c-" and key_token in _VOICE_RESERVED_CTRL_CHARS:
            return _DEFAULT_PT_KEY
        if (
            normalized_mod == "a-"
            and sys.platform == "darwin"
            and key_token in _VOICE_RESERVED_ALT_CHARS_MAC
        ):
            return _DEFAULT_PT_KEY
        return f"{normalized_mod}{key_token}"

    # Multi-char key token must be a known named key; typos like
    # ``ctrl+spcae`` fall back to the default rather than being passed
    # through as ``c-spcae`` (which prompt_toolkit would reject).
    named = _VOICE_NAMED_KEYS.get(key_token)
    if not named:
        return _DEFAULT_PT_KEY

    return f"{normalized_mod}{named}"


def format_voice_record_key_for_status(raw: Any) -> str:
    """Render ``voice.record_key`` for ``/voice status`` in CLI-friendly form.

    Mirrors the TUI's ``formatVoiceRecordKey``: returns ``Ctrl+B`` /
    ``Alt+Space`` / ``Ctrl+Enter``. Malformed configs surface as the
    documented default so status never advertises a shortcut that
    won't bind (Copilot round-10 on #19835).
    """
    normalized = normalize_voice_record_key_for_prompt_toolkit(raw)

    if normalized.startswith("c-"):
        prefix, key = "Ctrl+", normalized[2:]
    elif normalized.startswith("a-"):
        prefix, key = "Alt+", normalized[2:]
    elif "+" in normalized:
        # ``super+<key>`` / ``win+<key>`` — CLI won't bind them, but
        # render in title case so status output is still readable.
        mod, key = normalized.split("+", 1)
        prefix = mod[0].upper() + mod[1:] + "+"
    else:
        return "Ctrl+B"

    if not key:
        return prefix.rstrip("+")

    if len(key) == 1:
        return prefix + key.upper()

    return prefix + key[0].upper() + key[1:]


from tools.voice_mode import (
    create_audio_recorder,
    is_whisper_hallucination,
    play_audio_file,
    transcribe_recording,
)

logger = logging.getLogger(__name__)


def _debug(msg: str) -> None:
    """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.

    Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
    which createGatewayEventHandler shows as an Activity line — exactly
    what we need to diagnose "why didn't the loop auto-restart?" in the
    user's real terminal without shipping a separate debug RPC.

    Any OSError / BrokenPipeError is swallowed because this fires from
    background threads (silence callback, TTS daemon, beep) where a
    broken stderr pipe must not kill the whole gateway — the main
    command pipe (stdin+stdout) is what actually matters.
    """
    if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
        return
    try:
        print(f"[voice] {msg}", file=sys.stderr, flush=True)
    except (BrokenPipeError, OSError):
        pass


def _beeps_enabled() -> bool:
    """CLI parity: voice.beep_enabled in config.yaml (default True)."""
    try:
        from hermes_cli.config import load_config

        voice_cfg = load_config().get("voice", {})
        if isinstance(voice_cfg, dict):
            return bool(voice_cfg.get("beep_enabled", True))
    except Exception:
        pass
    return True


def _play_beep(frequency: int, count: int = 1) -> None:
    """Audible cue matching cli.py's record/stop beeps.

    880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
    660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
    Best-effort — sounddevice failures are silently swallowed so the
    voice loop never breaks because a speaker was unavailable.
    """
    if not _beeps_enabled():
        return
    try:
        from tools.voice_mode import play_beep

        play_beep(frequency=frequency, count=count)
    except Exception as e:
        _debug(f"beep {frequency}Hz failed: {e}")

# ── Push-to-talk state ───────────────────────────────────────────────
_recorder = None
_recorder_lock = threading.Lock()

# ── Continuous (VAD) state ───────────────────────────────────────────
_continuous_lock = threading.Lock()
_continuous_active = False
_continuous_recorder: Any = None

# ── TTS-vs-STT feedback guard ────────────────────────────────────────
# When TTS plays the agent reply over the speakers, the live microphone
# picks it up and transcribes the agent's own voice as user input — an
# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
# playing, set while silent. _continuous_on_silence waits on it before
# re-arming the recorder, and speak_text itself cancels any live capture
# before starting playback so the tail of the previous utterance doesn't
# leak into the mic.
_tts_playing = threading.Event()
_tts_playing.set()  # initially "not playing"
_continuous_on_transcript: Optional[Callable[[str], None]] = None
_continuous_on_status: Optional[Callable[[str], None]] = None
_continuous_on_silent_limit: Optional[Callable[[], None]] = None
_continuous_no_speech_count = 0
_CONTINUOUS_NO_SPEECH_LIMIT = 3


# ── Push-to-talk API ─────────────────────────────────────────────────


def start_recording() -> None:
    """Begin capturing from the default input device (push-to-talk).

    Idempotent — calling again while a recording is in progress is a no-op.
    """
    global _recorder

    with _recorder_lock:
        if _recorder is not None and getattr(_recorder, "is_recording", False):
            return
        rec = create_audio_recorder()
        rec.start()
        _recorder = rec


def stop_and_transcribe() -> Optional[str]:
    """Stop the active push-to-talk recording, transcribe, return text.

    Returns ``None`` when no recording is active, when the microphone
    captured no speech, or when Whisper returned a known hallucination.
    """
    global _recorder

    with _recorder_lock:
        rec = _recorder
        _recorder = None

    if rec is None:
        return None

    wav_path = rec.stop()
    if not wav_path:
        return None

    try:
        result = transcribe_recording(wav_path)
    except Exception as e:
        logger.warning("voice transcription failed: %s", e)
        return None
    finally:
        try:
            if os.path.isfile(wav_path):
                os.unlink(wav_path)
        except Exception:
            pass

    # transcribe_recording returns {"success": bool, "transcript": str, ...}
    # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
    if not result.get("success"):
        return None
    text = (result.get("transcript") or "").strip()
    if not text or is_whisper_hallucination(text):
        return None

    return text


# ── Continuous (VAD) API ─────────────────────────────────────────────


def start_continuous(
    on_transcript: Callable[[str], None],
    on_status: Optional[Callable[[str], None]] = None,
    on_silent_limit: Optional[Callable[[], None]] = None,
    silence_threshold: int = 200,
    silence_duration: float = 3.0,
) -> None:
    """Start a VAD-driven continuous recording loop.

    The loop calls ``on_transcript(text)`` each time speech is detected and
    transcribed successfully, then auto-restarts. After
    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
    picked up at all) the loop stops itself and calls ``on_silent_limit``
    so the UI can reflect "voice off". Idempotent — calling while already
    active is a no-op.

    ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
    ``"idle"`` so the UI can show a live indicator.
    """
    global _continuous_active, _continuous_recorder
    global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
    global _continuous_no_speech_count

    with _continuous_lock:
        if _continuous_active:
            _debug("start_continuous: already active — no-op")
            return
        _continuous_active = True
        _continuous_on_transcript = on_transcript
        _continuous_on_status = on_status
        _continuous_on_silent_limit = on_silent_limit
        _continuous_no_speech_count = 0

        if _continuous_recorder is None:
            _continuous_recorder = create_audio_recorder()

        _continuous_recorder._silence_threshold = silence_threshold
        _continuous_recorder._silence_duration = silence_duration
        rec = _continuous_recorder

    _debug(
        f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
    )

    # CLI parity: single 880 Hz beep *before* opening the stream — placing
    # the beep after stream.start() on macOS triggers a CoreAudio conflict
    # (cli.py:7528 comment).
    _play_beep(frequency=880, count=1)

    try:
        rec.start(on_silence_stop=_continuous_on_silence)
    except Exception as e:
        logger.error("failed to start continuous recording: %s", e)
        _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
        with _continuous_lock:
            _continuous_active = False
        raise

    if on_status:
        try:
            on_status("listening")
        except Exception:
            pass


def stop_continuous() -> None:
    """Stop the active continuous loop and release the microphone.

    Idempotent — calling while not active is a no-op. Any in-flight
    transcription completes but its result is discarded (the callback
    checks ``_continuous_active`` before firing).
    """
    global _continuous_active, _continuous_on_transcript
    global _continuous_on_status, _continuous_on_silent_limit
    global _continuous_recorder, _continuous_no_speech_count

    with _continuous_lock:
        if not _continuous_active:
            return
        _continuous_active = False
        rec = _continuous_recorder
        on_status = _continuous_on_status
        _continuous_on_transcript = None
        _continuous_on_status = None
        _continuous_on_silent_limit = None
        _continuous_no_speech_count = 0

    if rec is not None:
        try:
            # cancel() (not stop()) discards buffered frames — the loop
            # is over, we don't want to transcribe a half-captured turn.
            rec.cancel()
        except Exception as e:
            logger.warning("failed to cancel recorder: %s", e)

    # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
    # silence-auto-stop path plays).
    _play_beep(frequency=660, count=2)

    if on_status:
        try:
            on_status("idle")
        except Exception:
            pass


def is_continuous_active() -> bool:
    """Whether a continuous voice loop is currently running."""
    with _continuous_lock:
        return _continuous_active


def _continuous_on_silence() -> None:
    """AudioRecorder silence callback — runs in a daemon thread.

    Stops the current capture, transcribes, delivers the text via
    ``on_transcript``, and — if the loop is still active — starts the
    next capture. Three consecutive silent cycles end the loop.
    """
    global _continuous_active, _continuous_no_speech_count

    _debug("_continuous_on_silence: fired")

    with _continuous_lock:
        if not _continuous_active:
            _debug("_continuous_on_silence: loop inactive — abort")
            return
        rec = _continuous_recorder
        on_transcript = _continuous_on_transcript
        on_status = _continuous_on_status
        on_silent_limit = _continuous_on_silent_limit

    if rec is None:
        _debug("_continuous_on_silence: no recorder — abort")
        return

    if on_status:
        try:
            on_status("transcribing")
        except Exception:
            pass

    wav_path = rec.stop()
    # Peak RMS is the critical diagnostic when stop() returns None despite
    # the VAD firing — tells us at a glance whether the mic was too quiet
    # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
    peak_rms = getattr(rec, "_peak_rms", -1)
    _debug(
        f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
    )

    # CLI parity: double 660 Hz beep after the stream stops (safe from the
    # CoreAudio conflict that blocks pre-start beeps).
    _play_beep(frequency=660, count=2)

    transcript: Optional[str] = None

    if wav_path:
        try:
            result = transcribe_recording(wav_path)
            # transcribe_recording returns {"success": bool, "transcript": str,
            # "error": str?} — NOT {"text": str}.  Using the wrong key silently
            # produced empty transcripts even when Groq/local STT returned fine,
            # which masqueraded as "not hearing the user" to the caller.
            success = bool(result.get("success"))
            text = (result.get("transcript") or "").strip()
            err = result.get("error")
            _debug(
                f"_continuous_on_silence: transcribe -> success={success} "
                f"text={text!r} err={err!r}"
            )
            if success and text and not is_whisper_hallucination(text):
                transcript = text
        except Exception as e:
            logger.warning("continuous transcription failed: %s", e)
            _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
        finally:
            try:
                if os.path.isfile(wav_path):
                    os.unlink(wav_path)
            except Exception:
                pass

    with _continuous_lock:
        if not _continuous_active:
            # User stopped us while we were transcribing — discard.
            _debug("_continuous_on_silence: stopped during transcribe — no restart")
            return
        if transcript:
            _continuous_no_speech_count = 0
        else:
            _continuous_no_speech_count += 1
        should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
        no_speech = _continuous_no_speech_count

    if transcript and on_transcript:
        try:
            on_transcript(transcript)
        except Exception as e:
            logger.warning("on_transcript callback raised: %s", e)

    if should_halt:
        _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
        with _continuous_lock:
            _continuous_active = False
            _continuous_no_speech_count = 0
        if on_silent_limit:
            try:
                on_silent_limit()
            except Exception:
                pass
        try:
            rec.cancel()
        except Exception:
            pass
        if on_status:
            try:
                on_status("idle")
            except Exception:
                pass
        return

    # CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
    # finish before re-arming the mic, then leave a small gap to avoid
    # catching the tail of the speaker output.  Without this the voice
    # loop becomes a feedback loop — the agent's spoken reply lands
    # back in the mic and gets re-submitted.
    if not _tts_playing.is_set():
        _debug("_continuous_on_silence: waiting for TTS to finish")
        _tts_playing.wait(timeout=60)
        import time as _time
        _time.sleep(0.3)

        # User may have stopped the loop during the wait.
        with _continuous_lock:
            if not _continuous_active:
                _debug("_continuous_on_silence: stopped while waiting for TTS")
                return

    # Restart for the next turn.
    _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
    _play_beep(frequency=880, count=1)
    try:
        rec.start(on_silence_stop=_continuous_on_silence)
    except Exception as e:
        logger.error("failed to restart continuous recording: %s", e)
        _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
        with _continuous_lock:
            _continuous_active = False
        return

    if on_status:
        try:
            on_status("listening")
        except Exception:
            pass


# ── TTS API ──────────────────────────────────────────────────────────


def speak_text(text: str) -> None:
    """Synthesize ``text`` with the configured TTS provider and play it.

    Mirrors cli.py:_voice_speak_response exactly — same markdown strip
    pipeline, same 4000-char cap, same explicit mp3 output path, same
    MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
    of both extensions. Keeping these in sync means a voice-mode TTS
    session in the TUI sounds identical to one in the classic CLI.

    While playback is in flight the module-level _tts_playing Event is
    cleared so the continuous-recording loop knows to wait before
    re-arming the mic (otherwise the agent's spoken reply feedback-loops
    through the microphone and the agent ends up replying to itself).
    """
    if not text or not text.strip():
        return

    import re
    import tempfile
    import time

    # Cancel any live capture before we open the speakers — otherwise the
    # last ~200ms of the user's turn tail + the first syllables of our TTS
    # both end up in the next recording window.  The continuous loop will
    # re-arm itself after _tts_playing flips back (see _continuous_on_silence).
    paused_recording = False
    with _continuous_lock:
        if (
            _continuous_active
            and _continuous_recorder is not None
            and getattr(_continuous_recorder, "is_recording", False)
        ):
            try:
                _continuous_recorder.cancel()
                paused_recording = True
            except Exception as e:
                logger.warning("failed to pause recorder for TTS: %s", e)

    _tts_playing.clear()
    _debug(f"speak_text: TTS begin (paused_recording={paused_recording})")

    try:
        from tools.tts_tool import text_to_speech_tool

        tts_text = text[:4000] if len(text) > 4000 else text
        tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text)             # fenced code blocks
        tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text)    # [text](url) → text
        tts_text = re.sub(r'https?://\S+', '', tts_text)                # bare URLs
        tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text)            # bold
        tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text)                # italic
        tts_text = re.sub(r'`(.+?)`', r'\1', tts_text)                  # inline code
        tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE)  # headers
        tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE)  # list bullets
        tts_text = re.sub(r'---+', '', tts_text)                        # horizontal rules
        tts_text = re.sub(r'\n{3,}', '\n\n', tts_text)                  # excess newlines
        tts_text = tts_text.strip()
        if not tts_text:
            return

        # MP3 output path, pre-chosen so we can play the MP3 directly even
        # when text_to_speech_tool auto-converts to OGG for messaging
        # platforms.  afplay's OGG support is flaky, MP3 always works.
        os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
        mp3_path = os.path.join(
            tempfile.gettempdir(),
            "hermes_voice",
            f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
        )

        _debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
        text_to_speech_tool(text=tts_text, output_path=mp3_path)

        if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
            _debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
            play_audio_file(mp3_path)
            try:
                os.unlink(mp3_path)
                ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
                if os.path.isfile(ogg_path):
                    os.unlink(ogg_path)
            except OSError:
                pass
        else:
            _debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
    except Exception as e:
        logger.warning("Voice TTS playback failed: %s", e)
        _debug(f"speak_text raised {type(e).__name__}: {e}")
    finally:
        _tts_playing.set()
        _debug("speak_text: TTS done")

        # Re-arm the mic so the user can answer without pressing Ctrl+B.
        # Small delay lets the OS flush speaker output and afplay fully
        # release the audio device before sounddevice re-opens the input.
        if paused_recording:
            time.sleep(0.3)
            with _continuous_lock:
                if _continuous_active and _continuous_recorder is not None:
                    try:
                        _continuous_recorder.start(
                            on_silence_stop=_continuous_on_silence
                        )
                        _debug("speak_text: recording resumed after TTS")
                    except Exception as e:
                        logger.warning(
                            "failed to resume recorder after TTS: %s", e
                        )
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								"""Process-wide voice recording + TTS API for the TUI gateway.
 								Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
 								(text-to-speech) behind idempotent, stateful entry points that the gateway's
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
 								call from a dedicated thread. The gateway imports this module lazily so that
 								missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
 								an ``ImportError`` at call time, not at startup.
 								Two usage modes are exposed:
 								* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
 								  manually-bounded capture used when the caller drives the start/stop pair
 								  explicitly.
 								* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
 								  the classic CLI voice mode: recording auto-stops on silence, transcribes,
 								  hands the result to a callback, and then auto-restarts for the next turn.
 								  Three consecutive no-speech cycles stop the loop and fire
 								  ``on_silent_limit`` so the UI can turn the mode off.
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								"""
 								from __future__ import annotations
 								import logging
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								import os
 								import sys
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								import threading
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								from typing import Any, Callable, Optional
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
-												fix(tui): respect voice.record_key config (supersedes #19028, #19339) (#19835)

* fix(tui): respect voice.record_key config instead of hardcoded Ctrl+B

Classic CLI loaded ``voice.record_key`` from config.yaml and bound the
prompt-toolkit handler dynamically (``cli.py`` paths). The new TUI hard-
coded ``Ctrl+B`` everywhere — ``isVoiceToggleKey`` (input handler),
``/voice status`` ("Record key: Ctrl+B"), and ``/voice on`` ("Ctrl+B to
start/stop recording"). A user who set ``voice.record_key: ctrl+o``
(or any other key) saw the documented config silently ignored — only
Ctrl+B worked, the displayed shortcut lied about it.

Wire the configured key end to end through the existing channels:

* **Backend** (``tui_gateway/server.py``): ``voice.toggle`` action=status
  AND action=on/off responses now include ``record_key``, sourced from
  ``config.get('voice', {}).get('record_key', 'ctrl+b')``.
* **Backend types** (``ui-tui/src/gatewayTypes.ts``): ``ConfigFullResponse``
  now exposes ``config.voice.record_key`` and ``VoiceToggleResponse``
  carries ``record_key`` so the TUI can both bind and display it.
* **Frontend parser/formatter** (``ui-tui/src/lib/platform.ts``):
  ``parseVoiceRecordKey()`` accepts ``ctrl+b`` / ``alt+r`` / ``cmd+space``
  and the common aliases (``option``, ``cmd``, ``win``, …); falls back to
  the documented Ctrl+B for empty / multi-character / malformed input so
  a typo never silently disables the shortcut. ``formatVoiceRecordKey()``
  renders for status text. ``isVoiceToggleKey`` now takes a parsed
  ``ParsedVoiceRecordKey`` argument; the hardcoded ``ch === 'b'`` is
  gone. Default arg keeps existing call sites back-compat.
* **Hydration** (``ui-tui/src/app/useConfigSync.ts``,
  ``useMainApp.ts``): startup ``config.get full`` already runs; extract
  ``cfg.voice.record_key`` from it, parse, push into a new
  ``voiceRecordKey`` state, and forward to the input handler ctx
  (``InputHandlerContext.voice.recordKey``). Mtime-poll path also
  re-applies the parsed key so a hand-edit of config.yaml takes effect
  the next tick — matches existing behaviour for display options.
* **Input handler** (``ui-tui/src/app/useInputHandlers.ts``):
  ``isVoiceToggleKey(key, ch, voice.recordKey)`` so the configured
  binding fires.
* **Slash command** (``ui-tui/src/app/slash/commands/session.ts``):
  ``/voice status`` and ``/voice on`` use ``formatVoiceRecordKey`` on
  the response's ``record_key`` instead of the hardcoded label.

Tests:
* ``parseVoiceRecordKey`` covers ctrl/alt/cmd/super aliases, multi-char
  rejection, and empty fallback.
* ``formatVoiceRecordKey`` covers the doc examples (``Ctrl+B``,
  ``Ctrl+O``, ``Alt+R``, ``Cmd+B``).
* ``isVoiceToggleKey`` regression: ``ctrl+o`` configured → only ``o``
  matches, not ``b``; ``alt+r`` matches both alt-bit and meta-bit
  encodings (terminal protocol parity); omitted-arg call still binds
  Ctrl+B for back-compat.

Full TUI suite (555 tests) passes; ``tsc --noEmit`` clean.

Fixes #18994

Co-authored-by: asheriif <ahmedsherif95@gmail.com>

* fix(tui): support named-key tokens in voice.record_key (space, enter, …)

Reviewer caught that the round-1 parser in #18994 rejected every
multi-character token, so a config value like ``ctrl+space`` (which the
CLI happily binds via prompt_toolkit's ``c-space`` rewrite in
``cli.py``) silently fell back to the documented Ctrl+B default —
re-introducing the same false-shortcut bug the PR was meant to fix,
just at a different surface.

Add explicit named-key support that mirrors what the CLI accepts:

* ``space``         (alias: ``spc``)        → matches ``ch === ' '``
* ``enter``         (alias: ``return``, ``ret``) → matches ``key.return``
* ``tab``                                   → matches ``key.tab``
* ``escape``        (alias: ``esc``)        → matches ``key.escape``
* ``backspace``     (alias: ``bs``)         → matches ``key.backspace``
* ``delete``        (alias: ``del``)        → matches ``key.delete``

``ParsedVoiceRecordKey`` gains an optional ``named`` field; ``ch``
holds either a single char (back-compat) or the canonical named token,
and the runtime matcher dispatches on ``named`` before checking the
modifier shape. Aliases collapse to one canonical name so
``ctrl+esc`` and ``ctrl+escape`` behave identically.

Unrecognised multi-character tokens (e.g. ``ctrl+spcae`` typo, or
unsupported keys like ``ctrl+f5``) still fall back to the Ctrl+B
default rather than silently disabling the binding — keeps the "typo
never silently kills the shortcut" guarantee.

Tests:

* ``parseVoiceRecordKey`` parametrised over every named token + each
  alias variant.
* New ``isVoiceToggleKey`` cases for space (ch-based match), enter
  (``key.return``), tab, escape, backspace, delete, including
  modifier-mismatch negatives.
* ``formatVoiceRecordKey`` renders named keys in title case
  (``Ctrl+Space``, ``Ctrl+Enter``).
* Existing fall-back-to-Ctrl+B contract preserved for empty input
  AND unrecognised multi-char tokens.

Full TUI suite: 559/559 pass; ``tsc --noEmit`` clean.

Refs #18994 (round-1 review feedback)

Co-authored-by: asheriif <ahmedsherif95@gmail.com>

* test(tui): assert voice.toggle returns configured record_key

Salvage the backend regression from #19339 — asserts ``voice.toggle``
action=on AND action=status responses carry the configured
``voice.record_key`` end-to-end through ``_load_cfg()``. Keeps the
CLI→TUI parity contract visible in the Python test suite alongside
the existing frontend parser/matcher/formatter coverage from #19028.

* fix(tui): address Copilot review on #19835 voice.record_key wiring

Five tightenings on the parser + matcher + hydration surface, all
caught by the Copilot review on the PR — each one turns a silent
false-fire or display/binding skew into a deterministic behaviour.

* **isVoiceToggleKey ctrl branch was too permissive for named keys.**
  The doc-default macOS Cmd+B muscle-memory fallback
  (``isActionMod(key)`` on top of ``key.ctrl``) fired for every
  configured key, so bare Esc — which hermes-ink reports with
  ``key.meta`` on some macOS terminals — triggered ``ctrl+escape``,
  and Alt+Space / Alt+Tab triggered ``ctrl+space`` / ``ctrl+tab``.
  Gate the fallback to the literal ``ctrl+b`` binding so any custom
  chord requires the real Ctrl bit.
* **Alt branch guarded against Ctrl/Cmd co-press.** Without this,
  Ctrl+Alt+<letter> and Cmd+Alt+<letter> also fired ``alt+<letter>``.
* **Dropped the ``meta`` modifier variant and its alias.** In
  hermes-ink ``key.meta`` is Alt on xterm-style terminals and Cmd on
  legacy macOS ones, so a literal ``meta+b`` config displayed as
  ``Cmd+B`` while matching Alt+B — exactly the kind of false
  shortcut the PR was meant to remove. ``cmd`` / ``command`` now
  collapse onto ``super`` (kitty-style ``key.super``, with a macOS
  ``key.meta`` fallback) and render as ``Cmd+B``. Unknown modifier
  tokens fall back to the documented Ctrl+B default rather than
  silently coercing to Ctrl.
* **Slash-command display/binding skew.** ``/voice status`` and
  ``/voice on`` rendered from the fresh gateway ``record_key``
  response, but ``useInputHandlers()`` still bound the old key
  until the next 5s mtime poll. Thread ``setVoiceRecordKey``
  through ``SlashHandlerContext.voice`` and push the parsed spec
  into frontend state on every response so text and binding stay
  consistent.
* **Test coverage for the two paths Copilot flagged.** Added
  vitest coverage for (a) the three-case ``/voice`` slash output
  in ``createSlashHandler.test.ts`` and (b) the
  ``applyDisplay → voice.record_key`` hydration + omit-setter
  back-compat paths in ``useConfigSync.test.ts``. Plus regression
  cases for every false-fire scenario above.

Suite: 575/575 green, tsc --noEmit clean.

* fix(tui): address Copilot round-2 review on #19835

Three tightenings on the surface introduced in the round-1 fix:

* **``/voice tts`` reset custom bindings to Ctrl+B.** The ``tts`` branch
  of ``voice.toggle`` omitted ``record_key`` from its response, so the
  frontend's ``r.record_key ?? 'ctrl+b'`` coerced a user's custom
  binding back to the default on every TTS toggle. Two-sided fix:
  the backend now includes ``record_key`` on the ``tts`` branch (parity
  with ``status``/``on``/``off``), and the slash handler only pushes
  frontend state when the response actually carries ``record_key`` —
  belt-and-suspenders against any future branch forgetting to include
  it.

* **``super+b`` / ``win+b`` / ``cmd+b`` displayed "Cmd+B" on Linux and
  Windows.** ``formatVoiceRecordKey`` rendered ``mod === 'super'`` as
  ``Cmd`` universally, which told non-mac users the wrong modifier to
  press even though ``isVoiceToggleKey`` matched the right event bits.
  Gate the label to ``isMac`` so non-mac renders ``Super+B``.

* **``control+b`` / ``ctrl + b`` lost the macOS Cmd+B fallback.**
  ``_isDefaultVoiceKey`` keyed off ``parsed.raw`` — so
  semantically-equal aliases of the documented default dropped into
  the strict branch even though they bind Ctrl+B. Compare on the
  parsed spec (mod + ch + named) instead.

Coverage added: Linux ``Super+B`` rendering (and macOS ``Cmd+B``),
``control+b`` / ``ctrl + b`` accepting the Cmd+B fallback on darwin,
``/voice tts`` without ``record_key`` not clobbering cached binding,
and a backend regression asserting every ``voice.toggle`` branch
carries the configured key.

Suite: 579/579 TUI vitest green, 2/2 backend voice tests green,
tsc --noEmit clean.

* fix(tui): address Copilot round-3 review on #19835

Three classes of robustness issue caught on the second pass — all
revolve around malformed YAML tipping ``parseVoiceRecordKey`` or
``_voice_record_key`` into a crash instead of the documented
fallback.

* **Parser crashed on non-string YAML scalars.** ``config.get full``
  returns raw ``yaml.safe_load`` output, so ``voice.record_key: 1``
  or ``voice.record_key: true`` in a hand-edited config would hit
  ``.trim()`` on a number/bool and throw, breaking startup and
  every mtime re-apply. Accept ``unknown`` at the signature, guard
  with ``typeof raw !== 'string'``, and fall back to the default.

* **Backend blew up on non-dict ``voice:``.** Same YAML hazard on
  the gateway side: ``voice: true`` / ``voice: cmd+b`` left
  ``_load_cfg().get("voice")`` as a bool/str, so ``.get("record_key")``
  raised AttributeError and took every ``voice.toggle`` branch down
  with it. Centralised the lookup in a single
  ``_voice_record_key()`` helper that ``isinstance``-guards both
  ``voice`` and ``record_key`` and falls back to ``ctrl+b``.

* **Multi-modifier chords silently dropped extras.** The previous
  validator only checked the first modifier token, so ``ctrl+alt+r``
  silently parsed as ``ctrl+r`` and ``cmd+ctrl+b`` as ``super+b`` —
  a typo bound a different shortcut than the user configured.
  Reject multi-modifier spellings outright; the classic CLI only
  supports single-modifier bindings via prompt_toolkit's ``c-x`` /
  ``a-x`` rewrite, so this matches CLI parity.

Coverage added:

* ``parseVoiceRecordKey`` fallback on ``1`` / ``true`` / ``null`` /
  ``undefined`` / ``{}``.
* ``parseVoiceRecordKey`` fallback on ``ctrl+alt+r`` /
  ``cmd+ctrl+b`` / ``alt+ctrl+space``.
* ``test_voice_toggle_handles_non_dict_voice_cfg`` exercises
  every non-dict ``voice:`` shape (bool, str, None, int, list) and
  asserts each falls back to ``record_key: 'ctrl+b'``.

Suite: 581/581 TUI vitest green, 3/3 backend voice tests green,
tsc --noEmit clean.

* fix(tui): address Copilot round-4 review on #19835

Four final corners of the voice.record_key surface:

* **Bare-char configs silently coerced to ``ctrl+<key>``.** A config
  like ``voice.record_key: o`` / ``space`` / ``escape`` fell through
  to the default ``mod = 'ctrl'`` and silently bound Ctrl+O, while
  the classic CLI's prompt_toolkit would bind the raw key (no
  rewrite) — so the two runtimes silently disagreed on what "o"
  means. Require an explicit modifier; bare-char configs fall back
  to the documented Ctrl+B default.

* **Reserved ctrl+<letter> bindings would never fire.**
  ``useInputHandlers()`` intercepts ``ctrl+c`` (interrupt),
  ``ctrl+d`` (quit), and ``ctrl+l`` (clear screen) before the voice
  check runs, so those configs would be advertised in /voice
  status but the advertised shortcut never actually triggers
  push-to-talk. Added ``_RESERVED_CTRL_CHARS`` at parse time so
  the user gets the documented default instead of a dead shortcut.
  (``alt+c``, ``cmd+l``, etc. are not intercepted and stay usable.)

* **``_load_cfg()`` root itself may be a non-dict.**
  ``_voice_record_key()`` isinstance-guarded the ``voice`` subkey
  but not the root — a malformed config.yaml that collapsed to a
  scalar/list at the top level (``config.yaml: true`` or ``[]``)
  would still raise on ``.get("voice")``. Added the top-level
  guard too so every malformed shape falls back to ``ctrl+b``.

* **Stale header comment on ``isVoiceToggleKey``.** The doc-comment
  still claimed "On macOS we additionally accept the platform
  action modifier (Cmd) for the configured letter" even though the
  implementation gates the Cmd fallback to the documented default
  only. Rewrote to match.

Coverage added:

* ``parseVoiceRecordKey`` fallback on bare chars (``o``, ``b``,
  ``space``, ``escape``).
* ``parseVoiceRecordKey`` fallback on ``ctrl+c`` / ``ctrl+d`` /
  ``ctrl+l``; positive case for ``alt+c`` / ``cmd+l`` still usable.
* Backend ``test_voice_toggle_handles_non_dict_voice_cfg`` now
  exercises 5 non-dict shapes at the YAML root too.

Suite: 583/583 TUI vitest green, 3/3 backend voice tests green,
tsc --noEmit clean.

* fix(tui): address Copilot round-5 review on #19835

Three follow-ups on the voice matcher's modifier + shift discipline:

* **``super`` branch falsely fired on Alt+<key> / bare Esc on macOS.**
  ``isVoiceToggleKey`` accepted ``isMac && key.meta`` as a Cmd
  fallback for the ``super`` modifier — but hermes-ink sets
  ``key.meta`` for plain Alt/Option AND for bare Escape on some
  macOS terminals. A ``cmd+b`` config silently fired on Alt+B;
  ``cmd+space`` on Alt+Space; ``cmd+escape`` on bare Esc. Drop the
  fallback and require the literal ``key.super`` bit. Legacy-
  terminal users who need Cmd should upgrade to a kitty-protocol
  terminal or bind ``alt+X`` explicitly.

* **Shift bit was never checked.** The parser rejects multi-
  modifier configs like ``ctrl+shift+tab``, but the runtime
  matcher didn't check ``key.shift`` — so ``ctrl+tab`` also fired
  on Ctrl+Shift+Tab and ``alt+enter`` on Alt+Shift+Enter.
  Early-return on ``key.shift === true`` so the runtime only fires
  the exact chord the user configured.

* **Test leaked ``HERMES_VOICE=1`` into later tests.**
  ``voice.toggle`` action=on writes to ``os.environ`` directly
  (CLI parity, runtime-only flag); ``test_voice_toggle_returns_
  configured_record_key`` dispatched action=on without letting
  monkeypatch take ownership of the var first. Any later test
  that read voice mode in the same Python process could inherit a
  stale enabled state. Added ``monkeypatch.setenv("HERMES_VOICE",
  "0")`` up front so monkeypatch restores the original value at
  teardown.

Coverage added:

* ``cmd+b`` / ``cmd+space`` / ``cmd+escape`` do NOT fire on
  ``key.meta``-only events on darwin.
* ``ctrl+tab`` / ``alt+enter`` / ``ctrl+o`` reject matches when
  ``key.shift`` is held; sanity cases without Shift still fire.

Suite: 585/585 TUI vitest green, 3/3 backend voice tests green,
tsc --noEmit clean.

* fix(tui): address Copilot round-6 review on #19835

Three classes of modifier-discipline tightening + one config-surface
honesty fix:

* **Default ``ctrl+b`` Cmd fallback leaked Alt+B.** The default's
  macOS Cmd+B muscle-memory path used ``isActionMod(key)``, which
  returns ``key.meta || key.super`` on darwin. hermes-ink also
  reports plain Alt as ``key.meta``, so Alt+B silently fired the
  default binding. Replaced with strict ``isMac && key.super ===
  true`` — kitty-style Cmd+B still works, Alt+B correctly
  rejected. Legacy-terminal mac users (Terminal.app without
  CSI-u) now get raw Ctrl+B only; the documented default still
  works everywhere.

* **ctrl / super branches accepted extra modifier bits.** The
  parser rejects multi-modifier configs like ``ctrl+alt+o``, but
  the runtime matcher was permissive — ``ctrl+o`` fired on
  Ctrl+Alt+O / Ctrl+Cmd+O, and ``super+b`` fired on Cmd+Alt+B /
  Ctrl+Cmd+B. Added strict ``!key.alt && !key.meta && key.super
  !== true`` on ctrl, and ``!key.ctrl && !key.alt && !key.meta``
  on super, so the runtime only fires the exact chord the parser
  would let you configure.

* **Dropped ``cmd`` / ``command`` aliases.** They parsed to
  ``super`` and rendered as ``Cmd+X``, but legacy macOS terminals
  report Cmd as ``key.meta`` (same signal as Alt), so a
  ``cmd+o`` config was advertised as working but never actually
  fired on Terminal.app-without-CSI-u. That recreated the
  "displayed shortcut does not work" problem this PR was meant to
  remove. Users who want the platform action modifier spell it
  ``super`` / ``win`` — that matches the unambiguous ``key.super``
  bit, and kitty-style macOS terminals render it as ``Cmd+X`` via
  platform-aware formatter.

Coverage updated:

* Default ctrl+b no longer fires on Alt+B via ``key.meta`` leak;
  raw Ctrl+B and kitty-style Cmd+B still fire.
* ``ctrl+o`` rejects Ctrl+Alt+O / Ctrl+Cmd+O / Ctrl+Meta+O chords.
* ``super+b`` rejects Cmd+Alt+B / Cmd+Meta+B / Ctrl+Cmd+B chords.
* ``cmd+b`` / ``command+b`` / ``meta+b`` all fall back to the
  documented default at parse time (joined the ambiguous-mac-mod
  rejection class).
* Round-2 expectations that asserted ``cmd+b`` parsed as super
  and accepted ``key.meta`` on darwin updated to reflect the new
  stricter contract.

Suite: 588/588 TUI vitest green, 3/3 backend voice tests green,
tsc --noEmit clean.

* fix(tui): address Copilot follow-up on wire typing + escape precedence

Two follow-ups from the latest Copilot pass:

* **Config wire typing honesty (`gatewayTypes.ts`)**
  `config.get full` forwards raw `yaml.safe_load()` output, so
  `voice.record_key` can be any scalar/container when hand-edited.
  Typing it as `string` suggests a normalized contract that the
  backend does not guarantee and makes unsafe callers more likely.
  Change `ConfigVoiceConfig.record_key` to `unknown` with an
  explicit comment that callers must normalize at runtime.

* **Escape-based voice bindings were swallowed before voice check**
  `useInputHandlers()` handled `key.escape` for queue-edit cancel and
  selection clear before `isVoiceToggleKey(...)`, so configured
  `ctrl+escape` / `alt+escape` / `super+escape` chords were advertised
  but never toggled recording in those UI states.
  Add an early escape+voice check before generic Esc handlers so
  escape-based voice bindings win when configured, while plain Esc
  behavior remains unchanged.

Also updated PR #19835 description text to remove stale cmd/command
alias claims and match the current parser contract.

* fix(tui): pass configured voice shortcut through TextInput layer

Thread the live parsed voiceRecordKey into TextInput so configured voice.record_key chords bubble to useInputHandlers instead of being consumed as editor input. This removes the last hardcoded Ctrl+B pass-through in the composer path while preserving existing global control chord behavior.

* fix(tui): require explicit alt bit for escape-based alt chords

Hermes-ink reports bare Escape as meta=true+escape=true on some terminals, so a configured alt+escape binding was firing on bare Esc. Require an explicit key.alt bit when the configured named key is escape so plain Esc stays plain Esc; kitty-style alt+escape still fires.

* fix(tui): harden voice.record + TextInput paste + super-mod reserved list

Three round-7 Copilot follow-ups on #19835:

- voice.record start handler used _load_cfg().get('voice', {}).get(...) without
  shape checks, so malformed YAML (bool/scalar/list) returned 5025 instead of
  using VAD defaults. Centralized _voice_cfg_dict() helper and type-guarded
  silence_threshold/silence_duration with numeric fallbacks.
- TextInput pass-through check moved above paste/copy handling so configured
  voice chords (ctrl+v / alt+v / cmd+v) beat the composer's paste/copy
  defaults.
- parser now also rejects super+{c,d,l,v} — on macOS those are
  copy/exit/clear/paste and would be advertised in /voice status but never
  actually toggle recording.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix(tui): round-8 Copilot review — allow ctrl+x, gate super reservations to macOS, preserve voice key on transient RPC failure

Three round-8 Copilot follow-ups on #19835:

- Revert ctrl+x addition to _RESERVED_CTRL_CHARS (landed via Copilot Autofix
  commit 731ec86): ctrl+x is only claimed during queue-edit
  (queueEditIdx !== null), so voice works the rest of the session and
  matches CLI ctrl+<letter> parity.
- Gate super+{c,d,l,v} reservation to isMac. Linux/Windows TUI globals key
  off Ctrl, so kitty/CSI-u super+<letter> configs don't collide on non-mac
  and should stay usable.
- applyDisplay() now skips setVoiceRecordKey when cfg is null so one
  transient quietRpc() failure after a config edit doesn't clobber the
  cached binding back to Ctrl+B until the next successful poll.

New coverage:
- parseVoiceRecordKey preserves ctrl+x on linux
- super+{c,d,l,v} rejected on darwin, allowed on linux
- applyDisplay(null, ...) leaves voiceRecordKey untouched

* fix(cli,tui): normalize voice.record_key aliases across CLI + TUI for parity

Round-9 Copilot review on #19835: TUI accepted control+/option+/opt+/super+/win+ aliases but the classic CLI only rewrote literal ctrl+/alt+ before handing to prompt_toolkit, so a TUI-valid config silently bound a different (or no) shortcut in the CLI.

- Added normalize_voice_record_key_for_prompt_toolkit() in hermes_cli/voice.py with a single alias table (ctrl/control/alt/option/opt → c-/a-).
- Wired it into all three cli.py sites (_enable_voice_mode hint, _show_voice_status display, and the prompt_toolkit binding in _register_voice_handler).
- /voice status display now renders control+x as Ctrl+X and option+x as Alt+X (canonical casing) to match TUI formatVoiceRecordKey.
- super/win/windows are intentionally left unchanged: prompt_toolkit has no super modifier, so the CLI will reject them loudly at startup rather than silently binding Ctrl+B. Documented this split at both the TUI _MOD_ALIASES comment and the CLI normalizer docstring.
- Added tests covering ctrl/control/alt/option/opt mapping, case-insensitivity, non-string fallback, empty-string fallback, and super/win pass-through.

* fix(cli): port TUI parser contract into CLI voice.record_key normalizer

Round-10 Copilot review on #19835.

hermes_cli/voice.py's normalize_voice_record_key_for_prompt_toolkit() previously did blind substring replacement with no trim/validate step, so the CLI diverged from the TUI parser on:
- whitespace ('ctrl + b' -> 'c- b' instead of 'c-b')
- typoed named keys ('ctrl+spcae' passed through as 'c-spcae' and prompt_toolkit would reject at startup)
- bare-char configs ('o' should fall back, not pass through as 'o')
- multi-modifier chords ('ctrl+alt+r')
- reserved ctrl chars ('ctrl+c/d/l')
- unknown modifiers ('meta+b' / 'shift+b')
- named-key aliases ('return'/'esc'/'bs'/'del' not collapsed to prompt_toolkit canonicals)

Port the TUI parser contract into Python (_VOICE_MOD_ALIASES, _VOICE_NAMED_KEYS, _VOICE_RESERVED_CTRL_CHARS) so one config value binds the same shortcut in both runtimes.

Also added format_voice_record_key_for_status() shared between the PTT hint and /voice status display. Non-string scalars (voice.record_key: true / 1) now surface as 'Ctrl+B' instead of the raw scalar — /voice status no longer advertises a shortcut that can never bind.

Tests: 29/29 in test_voice_wrapper.py, including 11 new regressions covering whitespace, named-key aliases, typos, bare-char, multi-modifier, reserved ctrl, unknown mods, non-string fallback, and formatter contract.

* fix(cli): shape-safe voice config read + graceful super/win fallback

Round-11 Copilot review on #19835.

Two remaining cross-runtime gaps:

1. load_config().get('voice', {}) still assumed voice was a dict, so a hand-edited voice: true / voice: cmd+b at the top level raised AttributeError before the voice UI could start. Added voice_record_key_from_config(cfg) to hermes_cli/voice.py that isinstance-guards both the root and the voice subkey. All three cli.py read sites (_enable_voice_mode hint, _show_voice_status, PTT binding) now use it.

2. The CLI normalizer previously passed super+/win+/windows+ through unrewritten so prompt_toolkit would reject them loudly at startup — but that crash was a worse UX than a silent fallback. Normalizer now returns c-b for those spellings, and the PTT binding site logs a warning so users see why their TUI-only shortcut isn't binding in the CLI.

Coverage: 34/34 in tests/hermes_cli/test_voice_wrapper.py (5 new cases for voice_record_key_from_config + malformed-root + malformed-voice + extractor/normalizer composition).

* fix(cli): self-audit cleanup — remaining voice-config shape safety + doc drift

Self-review of the voice.record_key change set turned up four remaining items Copilot would very likely flag next round:

1. cli.py _voice_start_continuous still read load_config().get('voice', {}).get('silence_threshold') without an isinstance guard, so a hand-edited voice: true / voice: cmd+b (non-dict) raised AttributeError on VAD recording start. Shape-safe coerce the voice dict and numeric-guard silence_threshold/silence_duration.

2. cli.py _enable_voice_mode's auto_tts check had the same bug — fixed with the same isinstance guard.

3. hermes_cli/voice.py module comment on _VOICE_MOD_ALIASES still said super/win/windows 'pass through unchanged and prompt_toolkit's add() call loudly rejects them at startup'. Round 11 changed the normalizer to silently fall back to c-b with a warning at the binding site; updated the comment to match.

4. ui-tui/src/lib/platform.ts header comment had the same stale 'CLI will loudly reject them at startup' claim; updated to 'falls back to the documented default and logs a warning'.

No behavior change on the code paths already covered by test_voice_wrapper.py; the two cli.py fixes are defensive against malformed YAML that previous rounds already hardened in tui_gateway/server.py but missed in the classic CLI.

* fix(cli,tui): round-12 Copilot review — alt-collide on mac, bool-in-int guards, voice UI hardcodes, mtime-reload test

Five round-12 Copilot review items on #19835:

1. platform.ts: hermes-ink reports Alt as key.meta on many terminals; isActionMod on darwin accepts key.meta as the action modifier. So alt+c/d/l get claimed by isCopyShortcut / isAction('d')/'l') before the voice check. Reject those configs at parse time on macOS only (non-mac keeps them usable).

2. cli.py: four remaining hardcoded 'Ctrl+B' sites in voice-facing UI (_get_voice_status_fragments status bar, _voice_start_recording hints, _get_placeholder composer text) were still lying about non-default configs. Added self._voice_record_key_label() shared helper and wired it into all three sites.

3. server.py + cli.py: bool is a subclass of int, so isinstance(silence_threshold, (int, float)) accepted True/False from malformed YAML and forwarded 1/0 to the VAD engine. Exclude bool explicitly so boolean typos fall back to the documented 200 / 3.0 defaults.

4. useConfigSync.ts: extracted the config.get-full fetch+apply body into a shared hydrateFullConfig() helper. Both the initial hydration and mtime-reload paths now use it, so the polling/RPC wiring is exercised by direct unit tests (4 new cases: fresh apply, reapply on new value, transient RPC failure preserves cache, back-compat without voice setter).

5. Added alt+{c,d,l} rejection regressions on darwin + allow on linux, and bool-leak regressions for both silence_threshold and silence_duration in tests/test_tui_gateway_server.py.

Suite: 602/602 TUI vitest, 38/38 backend voice tests, typecheck + lints clean.

* fix(cli): cache voice record-key label at binding time + status-bar coverage

Round-13 Copilot review on #19835.

_voice_record_key_label() was reading live config on every render, which caused two problems:

1. prompt_toolkit registers the push-to-talk binding once at session start (@kb.add(_voice_key)); the binding does NOT re-read config. Editing voice.record_key mid-session would switch the status-bar / placeholder / recording-hint label to the new shortcut while the actual keybinding stayed on the startup chord — reintroducing the display/binding drift this whole PR is fighting.

2. Hot render path: during recording the UI is invalidated every 150ms, so re-loading + deep-merging config on every call added avoidable UI overhead.

Fix: cache the label at the same site that registers the prompt_toolkit binding via new set_voice_record_key_cache(raw_key). _voice_record_key_label() now just returns the cached value (falls back to 'Ctrl+B' before startup). Status/placeholder/hint are always in sync with the live binding; no config reload per render.

Also added 4 regression cases to tests/cli/test_cli_status_bar.py: configured ctrl+<letter> renders in both wide and compact status bars, configured named key (ctrl+space) renders in the recording hint, pre-startup absent cache falls back to Ctrl+B, and malformed configs (bool True) fall through the formatter to Ctrl+B.

Suite: 60/60 test_cli_status_bar + test_voice_wrapper, typecheck + lints clean.

* fix(cli): route /voice on + /voice status through startup-pinned label; mac alt+cdl parity

Round-14 Copilot review on #19835. All three comments legit:

1. _enable_voice_mode still formatted label from live load_config() — mid-session config edit would make /voice on announce the new shortcut while the prompt_toolkit binding stayed the startup chord. Use self._voice_record_key_label() (cached at binding time, round-13) so /voice on cannot drift from the live binding.

2. _show_voice_status had the same bug — /voice status reported live config instead of the pinned startup binding. Fixed the same way.

3. CLI normalizer accepted alt+c/alt+d/alt+l even though the TUI parser rejects them on macOS (Copilot round-12 — hermes-ink reports Alt as key.meta, isActionMod on darwin accepts it, collides with isCopyShortcut / isAction). Added _VOICE_RESERVED_ALT_CHARS_MAC = {c,d,l} gated to sys.platform == 'darwin' so a shared config like option+c falls back to c-b on both runtimes on macOS; non-mac still binds a-c.

Coverage: 4 new tests in test_voice_wrapper.py covering mac alt+cdl rejection, linux alt+cdl allowed, option/opt alias forms, and mac-specific exclusions for other alt letters. 62/62 in voice wrapper + status bar suites.

---------

Co-authored-by: Tranquil-Flow <tranquil_flow@protonmail.com>
Co-authored-by: asheriif <ahmedsherif95@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-04 15:49:28 -07:00
+								# Modifier aliases mirrored from the TUI parser (``ui-tui/src/lib/platform.ts``)
 								# ``_MOD_ALIASES`` table — the contract that removes the cross-runtime
 								# mismatch Copilot flagged in round-9 on #19835.
 								#
 								# ``super``/``win``/``windows`` are intentionally absent: prompt_toolkit
 								# has no super/meta modifier for the Cmd key, so those spellings are
 								# TUI-only. The normalizer below returns the documented default
 								# (``c-b``) for them — a silent fallback was preferred to a hard
 								# startup crash (Copilot round-11). The CLI binding site
 								# (``_register_voice_handler`` in cli.py) logs a warning when that
 								# fallback fires so users see why their TUI-only shortcut isn't
 								# bound in the classic CLI.
 								_VOICE_MOD_ALIASES = {
 								    "ctrl": "c-",
 								    "control": "c-",
 								    "alt": "a-",
 								    "option": "a-",
 								    "opt": "a-",
 								}
 								# Named keys prompt_toolkit accepts in ``c-<name>`` / ``a-<name>`` form.
 								# Aliases collapse to prompt_toolkit's canonical spelling so the same
 								# config value binds identically in both runtimes (Copilot round-10 on
 								# #19835).
 								_VOICE_NAMED_KEYS = {
 								    "space": "space",
 								    "spc": "space",
 								    "enter": "enter",
 								    "return": "enter",
 								    "ret": "enter",
 								    "tab": "tab",
 								    "escape": "escape",
 								    "esc": "escape",
 								    "backspace": "backspace",
 								    "bs": "backspace",
 								    "delete": "delete",
 								    "del": "delete",
 								}
 								# ``useInputHandlers()`` intercepts these before the voice check runs,
 								# so a binding like ``ctrl+c`` (interrupt), ``ctrl+d`` (quit), or
 								# ``ctrl+l`` (clear screen) would be advertised in /voice status but
 								# never fire push-to-talk — the same blocklist the TUI parser uses.
 								_VOICE_RESERVED_CTRL_CHARS = frozenset({"c", "d", "l"})
 								# On macOS the classic CLI's prompt_toolkit bindings for copy / exit /
 								# clear also claim ``a-c`` / ``a-d`` / ``a-l`` via the action-modifier
 								# lookup, and hermes-ink reports Alt as ``key.meta`` on many terminals.
 								# Mirror the TUI parser's darwin-only reservation so ``option+c`` etc.
 								# don't bind Alt+C in the CLI while the TUI silently falls back to
 								# Ctrl+B (Copilot round-14 on #19835).
 								_VOICE_RESERVED_ALT_CHARS_MAC = frozenset({"c", "d", "l"})
 								_DEFAULT_PT_KEY = "c-b"
 								def voice_record_key_from_config(cfg: Any) -> Any:
 								    """Shape-safe ``cfg.voice.record_key`` lookup.
 								    ``load_config()`` deep-merges raw YAML and preserves scalar
 								    overrides, so a hand-edited ``voice: true`` / ``voice: cmd+b``
 								    leaves ``cfg["voice"]`` as a bool/str instead of a dict, and the
 								    naive ``.get("voice", {}).get("record_key")`` chain raises
 								    AttributeError before voice can even start (Copilot round-11 on
 								    #19835). Return ``None`` for malformed shapes so call sites can
 								    feed the result straight into the normalizer/formatter and get
 								    the documented default.
 								    """
 								    if not isinstance(cfg, dict):
 								        return None
 								    voice = cfg.get("voice")
 								    if not isinstance(voice, dict):
 								        return None
 								    return voice.get("record_key")
 								def normalize_voice_record_key_for_prompt_toolkit(raw: Any) -> str:
 								    """Coerce ``voice.record_key`` into prompt_toolkit's ``c-x`` / ``a-x`` format.
 								    Mirrors the TUI parser contract (``ui-tui/src/lib/platform.ts``)
 								    so one config value binds the same shortcut in both runtimes:
 								    * non-string / empty / typo'd / bare-char / multi-modifier / reserved
 								      ``ctrl+c|d|l`` → documented default ``c-b``
 								    * single-char keys: ``ctrl+o`` → ``c-o``
 								    * named keys: ``ctrl+space`` → ``c-space`` (aliases collapse:
 								      ``ctrl+return`` → ``c-enter``)
 								    * ``super`` / ``win`` / ``windows`` → ``c-b`` (TUI-only modifiers —
 								      prompt_toolkit has no super mod; the CLI binding site is
 								      expected to warn when this fallback fires so users see the
 								      cross-runtime split, Copilot round-11 on #19835)
 								    """
 								    if not isinstance(raw, str):
 								        return _DEFAULT_PT_KEY
 								    lowered = raw.strip().lower()
 								    if not lowered:
 								        return _DEFAULT_PT_KEY
 								    parts = [p.strip() for p in lowered.split("+") if p.strip()]
 								    if not parts:
 								        return _DEFAULT_PT_KEY
 								    # Multi-modifier chords like ``ctrl+alt+r`` bind different shortcuts
 								    # in prompt_toolkit (a-c-r form) and hermes-ink rejects them; collapse
 								    # to the documented default instead of silently diverging.
 								    if len(parts) > 2:
 								        return _DEFAULT_PT_KEY
 								    # Bare char / bare named key (no explicit modifier) — the CLI's
 								    # prompt_toolkit binds the raw key without a modifier, which the TUI
 								    # parser refuses; reject here too so both runtimes agree.
 								    if len(parts) == 1:
 								        return _DEFAULT_PT_KEY
 								    modifier_token, key_token = parts
 								    # ``super`` / ``win`` / ``windows`` are TUI-only (prompt_toolkit has
 								    # no super modifier, so ``@kb.add(super+b)`` crashes the CLI at
 								    # startup). Fall back to the documented default here; the CLI
 								    # binding site is expected to log a warning when the configured
 								    # value is one of these spellings so users know the TUI+CLI
 								    # runtimes diverge on that shortcut (Copilot round-11 on #19835).
 								    if modifier_token in {"super", "win", "windows"}:
 								        return _DEFAULT_PT_KEY
 								    normalized_mod = _VOICE_MOD_ALIASES.get(modifier_token)
 								    if not normalized_mod:
 								        return _DEFAULT_PT_KEY
 								    # Single-char key: reject reserved-ctrl chords that the TUI would
 								    # also block at parse time, plus the mac-only alt reservation.
 								    if len(key_token) == 1:
 								        if normalized_mod == "c-" and key_token in _VOICE_RESERVED_CTRL_CHARS:
 								            return _DEFAULT_PT_KEY
 								        if (
 								            normalized_mod == "a-"
 								            and sys.platform == "darwin"
 								            and key_token in _VOICE_RESERVED_ALT_CHARS_MAC
 								        ):
 								            return _DEFAULT_PT_KEY
 								        return f"{normalized_mod}{key_token}"
 								    # Multi-char key token must be a known named key; typos like
 								    # ``ctrl+spcae`` fall back to the default rather than being passed
 								    # through as ``c-spcae`` (which prompt_toolkit would reject).
 								    named = _VOICE_NAMED_KEYS.get(key_token)
 								    if not named:
 								        return _DEFAULT_PT_KEY
 								    return f"{normalized_mod}{named}"
 								def format_voice_record_key_for_status(raw: Any) -> str:
 								    """Render ``voice.record_key`` for ``/voice status`` in CLI-friendly form.
 								    Mirrors the TUI's ``formatVoiceRecordKey``: returns ``Ctrl+B`` /
 								    ``Alt+Space`` / ``Ctrl+Enter``. Malformed configs surface as the
 								    documented default so status never advertises a shortcut that
 								    won't bind (Copilot round-10 on #19835).
 								    """
 								    normalized = normalize_voice_record_key_for_prompt_toolkit(raw)
 								    if normalized.startswith("c-"):
 								        prefix, key = "Ctrl+", normalized[2:]
 								    elif normalized.startswith("a-"):
 								        prefix, key = "Alt+", normalized[2:]
 								    elif "+" in normalized:
 								        # ``super+<key>`` / ``win+<key>`` — CLI won't bind them, but
 								        # render in title case so status output is still readable.
 								        mod, key = normalized.split("+", 1)
 								        prefix = mod[0].upper() + mod[1:] + "+"
 								    else:
 								        return "Ctrl+B"
 								    if not key:
 								        return prefix.rstrip("+")
 								    if len(key) == 1:
 								        return prefix + key.upper()
 								    return prefix + key[0].upper() + key[1:]
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								from tools.voice_mode import (
 								    create_audio_recorder,
 								    is_whisper_hallucination,
 								    play_audio_file,
 								    transcribe_recording,
 								)
 								logger = logging.getLogger(__name__)
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
 								def _debug(msg: str) -> None:
 								    """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
 								    Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
 								    which createGatewayEventHandler shows as an Activity line — exactly
 								    what we need to diagnose "why didn't the loop auto-restart?" in the
 								    user's real terminal without shipping a separate debug RPC.
-												fix(tui): ignore SIGPIPE so stderr back-pressure can't kill the gateway

Crash-log stack trace (tui_gateway_crash.log) from the user's session
pinned the regression: SIGPIPE arrived while main thread was blocked on
for-raw-in-sys.stdin — i.e., a background thread (debug print to stderr,
most likely from HERMES_VOICE_DEBUG=1) wrote to a pipe whose buffer the
TUI hadn't drained yet, and SIG_DFL promptly killed the process.

Two fixes that together restore CLI parity:

- entry.py: SIGPIPE → SIG_IGN instead of the _log_signal handler that
  then exited. With SIG_IGN, Python raises BrokenPipeError on the
  offending write, which write_json already handles with a clean exit
  via _log_exit. SIGTERM / SIGHUP still route through _log_signal so
  real termination signals remain diagnosable.

- hermes_cli/voice.py:_debug: wrap the stderr print in a BrokenPipeError
  / OSError try/except. This runs from daemon threads (silence callback,
  TTS playback, beep), so a broken stderr must not escape and ride up
  into the main event loop.

Verified by spawning the gateway subprocess locally:
  voice.toggle status → 200 OK, process stays alive, clean exit on
  stdin close logs "reason=stdin EOF" instead of a silent reap.

											
										
										
											2026-04-24 01:54:20 +03:00
 								    Any OSError / BrokenPipeError is swallowed because this fires from
 								    background threads (silence callback, TTS daemon, beep) where a
 								    broken stderr pipe must not kill the whole gateway — the main
 								    command pipe (stdin+stdout) is what actually matters.
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    """
-												fix(tui): ignore SIGPIPE so stderr back-pressure can't kill the gateway

Crash-log stack trace (tui_gateway_crash.log) from the user's session
pinned the regression: SIGPIPE arrived while main thread was blocked on
for-raw-in-sys.stdin — i.e., a background thread (debug print to stderr,
most likely from HERMES_VOICE_DEBUG=1) wrote to a pipe whose buffer the
TUI hadn't drained yet, and SIG_DFL promptly killed the process.

Two fixes that together restore CLI parity:

- entry.py: SIGPIPE → SIG_IGN instead of the _log_signal handler that
  then exited. With SIG_IGN, Python raises BrokenPipeError on the
  offending write, which write_json already handles with a clean exit
  via _log_exit. SIGTERM / SIGHUP still route through _log_signal so
  real termination signals remain diagnosable.

- hermes_cli/voice.py:_debug: wrap the stderr print in a BrokenPipeError
  / OSError try/except. This runs from daemon threads (silence callback,
  TTS playback, beep), so a broken stderr must not escape and ride up
  into the main event loop.

Verified by spawning the gateway subprocess locally:
  voice.toggle status → 200 OK, process stays alive, clean exit on
  stdin close logs "reason=stdin EOF" instead of a silent reap.

											
										
										
											2026-04-24 01:54:20 +03:00
+								    if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
 								        return
 								    try:
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								        print(f"[voice] {msg}", file=sys.stderr, flush=True)
-												fix(tui): ignore SIGPIPE so stderr back-pressure can't kill the gateway

Crash-log stack trace (tui_gateway_crash.log) from the user's session
pinned the regression: SIGPIPE arrived while main thread was blocked on
for-raw-in-sys.stdin — i.e., a background thread (debug print to stderr,
most likely from HERMES_VOICE_DEBUG=1) wrote to a pipe whose buffer the
TUI hadn't drained yet, and SIG_DFL promptly killed the process.

Two fixes that together restore CLI parity:

- entry.py: SIGPIPE → SIG_IGN instead of the _log_signal handler that
  then exited. With SIG_IGN, Python raises BrokenPipeError on the
  offending write, which write_json already handles with a clean exit
  via _log_exit. SIGTERM / SIGHUP still route through _log_signal so
  real termination signals remain diagnosable.

- hermes_cli/voice.py:_debug: wrap the stderr print in a BrokenPipeError
  / OSError try/except. This runs from daemon threads (silence callback,
  TTS playback, beep), so a broken stderr must not escape and ride up
  into the main event loop.

Verified by spawning the gateway subprocess locally:
  voice.toggle status → 200 OK, process stays alive, clean exit on
  stdin close logs "reason=stdin EOF" instead of a silent reap.

											
										
										
											2026-04-24 01:54:20 +03:00
+								    except (BrokenPipeError, OSError):
 								        pass
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
 								def _beeps_enabled() -> bool:
 								    """CLI parity: voice.beep_enabled in config.yaml (default True)."""
 								    try:
 								        from hermes_cli.config import load_config
 								        voice_cfg = load_config().get("voice", {})
 								        if isinstance(voice_cfg, dict):
 								            return bool(voice_cfg.get("beep_enabled", True))
 								    except Exception:
 								        pass
 								    return True
 								def _play_beep(frequency: int, count: int = 1) -> None:
 								    """Audible cue matching cli.py's record/stop beeps.
 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
 								    Best-effort — sounddevice failures are silently swallowed so the
 								    voice loop never breaks because a speaker was unavailable.
 								    """
 								    if not _beeps_enabled():
 								        return
 								    try:
 								        from tools.voice_mode import play_beep
 								        play_beep(frequency=frequency, count=count)
 								    except Exception as e:
 								        _debug(f"beep {frequency}Hz failed: {e}")
 								# ── Push-to-talk state ───────────────────────────────────────────────
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								_recorder = None
 								_recorder_lock = threading.Lock()
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								# ── Continuous (VAD) state ───────────────────────────────────────────
 								_continuous_lock = threading.Lock()
 								_continuous_active = False
 								_continuous_recorder: Any = None
-												fix(tui): break TTS→STT feedback loop + colorize REC badge

TTS feedback loop (hermes_cli/voice.py)

The VAD loop kept the microphone live while speak_text played the
agent's reply over the speakers, so the reply itself was picked up,
transcribed, and submitted — the agent then replied to its own echo
("Ha, looks like we're in a loop").

Ported cli.py:_voice_tts_done synchronisation:

- _tts_playing: threading.Event (initially set = "not playing").
- speak_text cancels the active recorder before opening the speakers,
  clears _tts_playing, and on exit waits 300 ms before re-starting the
  recorder — long enough for the OS audio device to settle so afplay
  and sounddevice don't race for it.
- _continuous_on_silence now waits on _tts_playing (up to 60 s) before
  re-arming the mic with another 300 ms gap, mirroring
  cli.py:10619-10621.  If the user flips voice off during the wait the
  loop exits cleanly instead of fighting for the device.

Without both halves the loop races: if the silence callback fires
before TTS starts it re-arms immediately; if TTS is already playing
the pause-and-resume path catches it.

Red REC badge (ui-tui appChrome + useMainApp)

Classic CLI (cli.py:_get_voice_status_fragments) renders "● REC" in
red and "◉ STT" in amber.  TUI was showing a dim "REC" with no dot,
making it hard to spot at a glance.  voiceLabel now emits the same
glyphs and appChrome colours them via t.color.error / t.color.warn,
falling back to dim for the idle label.

											
										
										
											2026-04-24 01:33:10 +03:00
 								# ── TTS-vs-STT feedback guard ────────────────────────────────────────
 								# When TTS plays the agent reply over the speakers, the live microphone
 								# picks it up and transcribes the agent's own voice as user input — an
 								# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
 								# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
 								# playing, set while silent. _continuous_on_silence waits on it before
 								# re-arming the recorder, and speak_text itself cancels any live capture
 								# before starting playback so the tail of the previous utterance doesn't
 								# leak into the mic.
 								_tts_playing = threading.Event()
 								_tts_playing.set()  # initially "not playing"
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								_continuous_on_transcript: Optional[Callable[[str], None]] = None
 								_continuous_on_status: Optional[Callable[[str], None]] = None
 								_continuous_on_silent_limit: Optional[Callable[[], None]] = None
 								_continuous_no_speech_count = 0
 								_CONTINUOUS_NO_SPEECH_LIMIT = 3
 								# ── Push-to-talk API ─────────────────────────────────────────────────
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
 								def start_recording() -> None:
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    """Begin capturing from the default input device (push-to-talk).
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    Idempotent — calling again while a recording is in progress is a no-op.
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								    """
 								    global _recorder
 								    with _recorder_lock:
 								        if _recorder is not None and getattr(_recorder, "is_recording", False):
 								            return
 								        rec = create_audio_recorder()
 								        rec.start()
 								        _recorder = rec
 								def stop_and_transcribe() -> Optional[str]:
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    """Stop the active push-to-talk recording, transcribe, return text.
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
 								    Returns ``None`` when no recording is active, when the microphone
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    captured no speech, or when Whisper returned a known hallucination.
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								    """
 								    global _recorder
 								    with _recorder_lock:
 								        rec = _recorder
 								        _recorder = None
 								    if rec is None:
 								        return None
 								    wav_path = rec.stop()
 								    if not wav_path:
 								        return None
 								    try:
 								        result = transcribe_recording(wav_path)
 								    except Exception as e:
 								        logger.warning("voice transcription failed: %s", e)
 								        return None
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    finally:
 								        try:
 								            if os.path.isfile(wav_path):
 								                os.unlink(wav_path)
 								        except Exception:
 								            pass
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    # transcribe_recording returns {"success": bool, "transcript": str, ...}
 								    # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
 								    if not result.get("success"):
 								        return None
 								    text = (result.get("transcript") or "").strip()
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								    if not text or is_whisper_hallucination(text):
 								        return None
 								    return text
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								# ── Continuous (VAD) API ─────────────────────────────────────────────
 								def start_continuous(
 								    on_transcript: Callable[[str], None],
 								    on_status: Optional[Callable[[str], None]] = None,
 								    on_silent_limit: Optional[Callable[[], None]] = None,
 								    silence_threshold: int = 200,
 								    silence_duration: float = 3.0,
 								) -> None:
 								    """Start a VAD-driven continuous recording loop.
 								    The loop calls ``on_transcript(text)`` each time speech is detected and
 								    transcribed successfully, then auto-restarts. After
 								    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
 								    picked up at all) the loop stops itself and calls ``on_silent_limit``
 								    so the UI can reflect "voice off". Idempotent — calling while already
 								    active is a no-op.
 								    ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
 								    ``"idle"`` so the UI can show a live indicator.
 								    """
 								    global _continuous_active, _continuous_recorder
 								    global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
 								    global _continuous_no_speech_count
 								    with _continuous_lock:
 								        if _continuous_active:
 								            _debug("start_continuous: already active — no-op")
 								            return
 								        _continuous_active = True
 								        _continuous_on_transcript = on_transcript
 								        _continuous_on_status = on_status
 								        _continuous_on_silent_limit = on_silent_limit
 								        _continuous_no_speech_count = 0
 								        if _continuous_recorder is None:
 								            _continuous_recorder = create_audio_recorder()
 								        _continuous_recorder._silence_threshold = silence_threshold
 								        _continuous_recorder._silence_duration = silence_duration
 								        rec = _continuous_recorder
 								    _debug(
 								        f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
 								    )
 								    # CLI parity: single 880 Hz beep *before* opening the stream — placing
 								    # the beep after stream.start() on macOS triggers a CoreAudio conflict
 								    # (cli.py:7528 comment).
 								    _play_beep(frequency=880, count=1)
 								    try:
 								        rec.start(on_silence_stop=_continuous_on_silence)
 								    except Exception as e:
 								        logger.error("failed to start continuous recording: %s", e)
 								        _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
 								        with _continuous_lock:
 								            _continuous_active = False
 								        raise
 								    if on_status:
 								        try:
 								            on_status("listening")
 								        except Exception:
 								            pass
 								def stop_continuous() -> None:
 								    """Stop the active continuous loop and release the microphone.
 								    Idempotent — calling while not active is a no-op. Any in-flight
 								    transcription completes but its result is discarded (the callback
 								    checks ``_continuous_active`` before firing).
 								    """
 								    global _continuous_active, _continuous_on_transcript
 								    global _continuous_on_status, _continuous_on_silent_limit
 								    global _continuous_recorder, _continuous_no_speech_count
 								    with _continuous_lock:
 								        if not _continuous_active:
 								            return
 								        _continuous_active = False
 								        rec = _continuous_recorder
 								        on_status = _continuous_on_status
 								        _continuous_on_transcript = None
 								        _continuous_on_status = None
 								        _continuous_on_silent_limit = None
 								        _continuous_no_speech_count = 0
 								    if rec is not None:
 								        try:
 								            # cancel() (not stop()) discards buffered frames — the loop
 								            # is over, we don't want to transcribe a half-captured turn.
 								            rec.cancel()
 								        except Exception as e:
 								            logger.warning("failed to cancel recorder: %s", e)
 								    # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
 								    # silence-auto-stop path plays).
 								    _play_beep(frequency=660, count=2)
 								    if on_status:
 								        try:
 								            on_status("idle")
 								        except Exception:
 								            pass
 								def is_continuous_active() -> bool:
 								    """Whether a continuous voice loop is currently running."""
 								    with _continuous_lock:
 								        return _continuous_active
 								def _continuous_on_silence() -> None:
 								    """AudioRecorder silence callback — runs in a daemon thread.
 								    Stops the current capture, transcribes, delivers the text via
 								    ``on_transcript``, and — if the loop is still active — starts the
 								    next capture. Three consecutive silent cycles end the loop.
 								    """
 								    global _continuous_active, _continuous_no_speech_count
 								    _debug("_continuous_on_silence: fired")
 								    with _continuous_lock:
 								        if not _continuous_active:
 								            _debug("_continuous_on_silence: loop inactive — abort")
 								            return
 								        rec = _continuous_recorder
 								        on_transcript = _continuous_on_transcript
 								        on_status = _continuous_on_status
 								        on_silent_limit = _continuous_on_silent_limit
 								    if rec is None:
 								        _debug("_continuous_on_silence: no recorder — abort")
 								        return
 								    if on_status:
 								        try:
 								            on_status("transcribing")
 								        except Exception:
 								            pass
 								    wav_path = rec.stop()
 								    # Peak RMS is the critical diagnostic when stop() returns None despite
 								    # the VAD firing — tells us at a glance whether the mic was too quiet
 								    # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
 								    peak_rms = getattr(rec, "_peak_rms", -1)
 								    _debug(
 								        f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
 								    )
 								    # CLI parity: double 660 Hz beep after the stream stops (safe from the
 								    # CoreAudio conflict that blocks pre-start beeps).
 								    _play_beep(frequency=660, count=2)
 								    transcript: Optional[str] = None
 								    if wav_path:
 								        try:
 								            result = transcribe_recording(wav_path)
 								            # transcribe_recording returns {"success": bool, "transcript": str,
 								            # "error": str?} — NOT {"text": str}.  Using the wrong key silently
 								            # produced empty transcripts even when Groq/local STT returned fine,
 								            # which masqueraded as "not hearing the user" to the caller.
 								            success = bool(result.get("success"))
 								            text = (result.get("transcript") or "").strip()
 								            err = result.get("error")
 								            _debug(
 								                f"_continuous_on_silence: transcribe -> success={success} "
 								                f"text={text!r} err={err!r}"
 								            )
 								            if success and text and not is_whisper_hallucination(text):
 								                transcript = text
 								        except Exception as e:
 								            logger.warning("continuous transcription failed: %s", e)
 								            _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
 								        finally:
 								            try:
 								                if os.path.isfile(wav_path):
 								                    os.unlink(wav_path)
 								            except Exception:
 								                pass
 								    with _continuous_lock:
 								        if not _continuous_active:
 								            # User stopped us while we were transcribing — discard.
 								            _debug("_continuous_on_silence: stopped during transcribe — no restart")
 								            return
 								        if transcript:
 								            _continuous_no_speech_count = 0
 								        else:
 								            _continuous_no_speech_count += 1
 								        should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
 								        no_speech = _continuous_no_speech_count
 								    if transcript and on_transcript:
 								        try:
 								            on_transcript(transcript)
 								        except Exception as e:
 								            logger.warning("on_transcript callback raised: %s", e)
 								    if should_halt:
 								        _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
 								        with _continuous_lock:
 								            _continuous_active = False
 								            _continuous_no_speech_count = 0
 								        if on_silent_limit:
 								            try:
 								                on_silent_limit()
 								            except Exception:
 								                pass
 								        try:
 								            rec.cancel()
 								        except Exception:
 								            pass
 								        if on_status:
 								            try:
 								                on_status("idle")
 								            except Exception:
 								                pass
 								        return
-												fix(tui): break TTS→STT feedback loop + colorize REC badge

TTS feedback loop (hermes_cli/voice.py)

The VAD loop kept the microphone live while speak_text played the
agent's reply over the speakers, so the reply itself was picked up,
transcribed, and submitted — the agent then replied to its own echo
("Ha, looks like we're in a loop").

Ported cli.py:_voice_tts_done synchronisation:

- _tts_playing: threading.Event (initially set = "not playing").
- speak_text cancels the active recorder before opening the speakers,
  clears _tts_playing, and on exit waits 300 ms before re-starting the
  recorder — long enough for the OS audio device to settle so afplay
  and sounddevice don't race for it.
- _continuous_on_silence now waits on _tts_playing (up to 60 s) before
  re-arming the mic with another 300 ms gap, mirroring
  cli.py:10619-10621.  If the user flips voice off during the wait the
  loop exits cleanly instead of fighting for the device.

Without both halves the loop races: if the silence callback fires
before TTS starts it re-arms immediately; if TTS is already playing
the pause-and-resume path catches it.

Red REC badge (ui-tui appChrome + useMainApp)

Classic CLI (cli.py:_get_voice_status_fragments) renders "● REC" in
red and "◉ STT" in amber.  TUI was showing a dim "REC" with no dot,
making it hard to spot at a glance.  voiceLabel now emits the same
glyphs and appChrome colours them via t.color.error / t.color.warn,
falling back to dim for the idle label.

											
										
										
											2026-04-24 01:33:10 +03:00
+								    # CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
 								    # finish before re-arming the mic, then leave a small gap to avoid
 								    # catching the tail of the speaker output.  Without this the voice
 								    # loop becomes a feedback loop — the agent's spoken reply lands
 								    # back in the mic and gets re-submitted.
 								    if not _tts_playing.is_set():
 								        _debug("_continuous_on_silence: waiting for TTS to finish")
 								        _tts_playing.wait(timeout=60)
 								        import time as _time
 								        _time.sleep(0.3)
 								        # User may have stopped the loop during the wait.
 								        with _continuous_lock:
 								            if not _continuous_active:
 								                _debug("_continuous_on_silence: stopped while waiting for TTS")
 								                return
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    # Restart for the next turn.
 								    _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
 								    _play_beep(frequency=880, count=1)
 								    try:
 								        rec.start(on_silence_stop=_continuous_on_silence)
 								    except Exception as e:
 								        logger.error("failed to restart continuous recording: %s", e)
 								        _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
 								        with _continuous_lock:
 								            _continuous_active = False
 								        return
 								    if on_status:
 								        try:
 								            on_status("listening")
 								        except Exception:
 								            pass
 								# ── TTS API ──────────────────────────────────────────────────────────
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								def speak_text(text: str) -> None:
 								    """Synthesize ``text`` with the configured TTS provider and play it.
-												fix(tui): voice TTS speak-back + transcript-key bug + auto-submit

Three issues surfaced during end-to-end testing of the CLI-parity voice
loop and are fixed together because they all blocked "speak → agent
responds → TTS reads it back" from working at all:

1. Wrong result key (hermes_cli/voice.py)

   transcribe_recording() returns {"success": bool, "transcript": str},
   matching cli.py:_voice_stop_and_transcribe. The wrapper was reading
   result.get("text"), which is None, so every successful Groq / local
   STT response was thrown away and the 3-strikes halt fired after
   three silent-looking cycles. Fixed by reading "transcript" and also
   honouring "success" like the CLI does. Updated the loop simulation
   tests to return the correct shape.

2. TTS speak-back was missing (tui_gateway/server.py + hermes_cli/voice.py)

   The TUI had a voice.toggle "tts" subcommand but nothing downstream
   actually read the flag — agent replies never spoke. Mirrored
   cli.py:8747-8754's dispatch: on message.complete with status ==
   "complete", if _voice_tts_enabled() is true, spawn a daemon thread
   running speak_text(response). Rewrote speak_text as a full port of
   cli.py:_voice_speak_response — same markdown-strip regex pipeline
   (code blocks, links, bold/italic, inline code, headers, list bullets,
   horizontal rules, excessive newlines), same 4000-char cap, same
   explicit mp3 output path, same MP3-over-OGG playback choice (afplay
   misbehaves on OGG), same cleanup of both extensions. Keeps TUI TTS
   audible output byte-for-byte identical to the classic CLI.

3. Auto-submit swallowed on non-empty composer (createGatewayEventHandler.ts)

   The voice.transcript handler branched on prev input via a setInput
   updater and fired submitRef.current inside the updater when prev was
   empty. React strict mode double-invokes state updaters, which would
   queue the submit twice; and when the composer had any content the
   transcript was merely appended — the agent never saw it. CLI
   _pending_input.put(transcript) unconditionally feeds the transcript
   as the next turn, so match that: always clear the composer and
   setTimeout(() => submitRef.current(text), 0) outside any updater.
   Side effect can't run twice this way, and a half-typed draft on the
   rare occasion is a fair trade vs. silently dropping the turn.

Also added peak_rms to the rec.stop debug line so "recording too quiet"
is diagnosable at a glance when HERMES_VOICE_DEBUG=1.

											
										
										
											2026-04-24 01:27:19 +03:00
+								    Mirrors cli.py:_voice_speak_response exactly — same markdown strip
 								    pipeline, same 4000-char cap, same explicit mp3 output path, same
 								    MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
 								    of both extensions. Keeping these in sync means a voice-mode TTS
 								    session in the TUI sounds identical to one in the classic CLI.
-												fix(tui): break TTS→STT feedback loop + colorize REC badge

TTS feedback loop (hermes_cli/voice.py)

The VAD loop kept the microphone live while speak_text played the
agent's reply over the speakers, so the reply itself was picked up,
transcribed, and submitted — the agent then replied to its own echo
("Ha, looks like we're in a loop").

Ported cli.py:_voice_tts_done synchronisation:

- _tts_playing: threading.Event (initially set = "not playing").
- speak_text cancels the active recorder before opening the speakers,
  clears _tts_playing, and on exit waits 300 ms before re-starting the
  recorder — long enough for the OS audio device to settle so afplay
  and sounddevice don't race for it.
- _continuous_on_silence now waits on _tts_playing (up to 60 s) before
  re-arming the mic with another 300 ms gap, mirroring
  cli.py:10619-10621.  If the user flips voice off during the wait the
  loop exits cleanly instead of fighting for the device.

Without both halves the loop races: if the silence callback fires
before TTS starts it re-arms immediately; if TTS is already playing
the pause-and-resume path catches it.

Red REC badge (ui-tui appChrome + useMainApp)

Classic CLI (cli.py:_get_voice_status_fragments) renders "● REC" in
red and "◉ STT" in amber.  TUI was showing a dim "REC" with no dot,
making it hard to spot at a glance.  voiceLabel now emits the same
glyphs and appChrome colours them via t.color.error / t.color.warn,
falling back to dim for the idle label.

											
										
										
											2026-04-24 01:33:10 +03:00
 								    While playback is in flight the module-level _tts_playing Event is
 								    cleared so the continuous-recording loop knows to wait before
 								    re-arming the mic (otherwise the agent's spoken reply feedback-loops
 								    through the microphone and the agent ends up replying to itself).
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								    """
 								    if not text or not text.strip():
 								        return
-												fix(tui): voice TTS speak-back + transcript-key bug + auto-submit

Three issues surfaced during end-to-end testing of the CLI-parity voice
loop and are fixed together because they all blocked "speak → agent
responds → TTS reads it back" from working at all:

1. Wrong result key (hermes_cli/voice.py)

   transcribe_recording() returns {"success": bool, "transcript": str},
   matching cli.py:_voice_stop_and_transcribe. The wrapper was reading
   result.get("text"), which is None, so every successful Groq / local
   STT response was thrown away and the 3-strikes halt fired after
   three silent-looking cycles. Fixed by reading "transcript" and also
   honouring "success" like the CLI does. Updated the loop simulation
   tests to return the correct shape.

2. TTS speak-back was missing (tui_gateway/server.py + hermes_cli/voice.py)

   The TUI had a voice.toggle "tts" subcommand but nothing downstream
   actually read the flag — agent replies never spoke. Mirrored
   cli.py:8747-8754's dispatch: on message.complete with status ==
   "complete", if _voice_tts_enabled() is true, spawn a daemon thread
   running speak_text(response). Rewrote speak_text as a full port of
   cli.py:_voice_speak_response — same markdown-strip regex pipeline
   (code blocks, links, bold/italic, inline code, headers, list bullets,
   horizontal rules, excessive newlines), same 4000-char cap, same
   explicit mp3 output path, same MP3-over-OGG playback choice (afplay
   misbehaves on OGG), same cleanup of both extensions. Keeps TUI TTS
   audible output byte-for-byte identical to the classic CLI.

3. Auto-submit swallowed on non-empty composer (createGatewayEventHandler.ts)

   The voice.transcript handler branched on prev input via a setInput
   updater and fired submitRef.current inside the updater when prev was
   empty. React strict mode double-invokes state updaters, which would
   queue the submit twice; and when the composer had any content the
   transcript was merely appended — the agent never saw it. CLI
   _pending_input.put(transcript) unconditionally feeds the transcript
   as the next turn, so match that: always clear the composer and
   setTimeout(() => submitRef.current(text), 0) outside any updater.
   Side effect can't run twice this way, and a half-typed draft on the
   rare occasion is a fair trade vs. silently dropping the turn.

Also added peak_rms to the rec.stop debug line so "recording too quiet"
is diagnosable at a glance when HERMES_VOICE_DEBUG=1.

											
										
										
											2026-04-24 01:27:19 +03:00
+								    import re
 								    import tempfile
 								    import time
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
-												fix(tui): break TTS→STT feedback loop + colorize REC badge

TTS feedback loop (hermes_cli/voice.py)

The VAD loop kept the microphone live while speak_text played the
agent's reply over the speakers, so the reply itself was picked up,
transcribed, and submitted — the agent then replied to its own echo
("Ha, looks like we're in a loop").

Ported cli.py:_voice_tts_done synchronisation:

- _tts_playing: threading.Event (initially set = "not playing").
- speak_text cancels the active recorder before opening the speakers,
  clears _tts_playing, and on exit waits 300 ms before re-starting the
  recorder — long enough for the OS audio device to settle so afplay
  and sounddevice don't race for it.
- _continuous_on_silence now waits on _tts_playing (up to 60 s) before
  re-arming the mic with another 300 ms gap, mirroring
  cli.py:10619-10621.  If the user flips voice off during the wait the
  loop exits cleanly instead of fighting for the device.

Without both halves the loop races: if the silence callback fires
before TTS starts it re-arms immediately; if TTS is already playing
the pause-and-resume path catches it.

Red REC badge (ui-tui appChrome + useMainApp)

Classic CLI (cli.py:_get_voice_status_fragments) renders "● REC" in
red and "◉ STT" in amber.  TUI was showing a dim "REC" with no dot,
making it hard to spot at a glance.  voiceLabel now emits the same
glyphs and appChrome colours them via t.color.error / t.color.warn,
falling back to dim for the idle label.

											
										
										
											2026-04-24 01:33:10 +03:00
+								    # Cancel any live capture before we open the speakers — otherwise the
 								    # last ~200ms of the user's turn tail + the first syllables of our TTS
 								    # both end up in the next recording window.  The continuous loop will
 								    # re-arm itself after _tts_playing flips back (see _continuous_on_silence).
 								    paused_recording = False
 								    with _continuous_lock:
 								        if (
 								            _continuous_active
 								            and _continuous_recorder is not None
 								            and getattr(_continuous_recorder, "is_recording", False)
 								        ):
 								            try:
 								                _continuous_recorder.cancel()
 								                paused_recording = True
 								            except Exception as e:
 								                logger.warning("failed to pause recorder for TTS: %s", e)
 								    _tts_playing.clear()
 								    _debug(f"speak_text: TTS begin (paused_recording={paused_recording})")
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								    try:
-												fix(tui): voice TTS speak-back + transcript-key bug + auto-submit

Three issues surfaced during end-to-end testing of the CLI-parity voice
loop and are fixed together because they all blocked "speak → agent
responds → TTS reads it back" from working at all:

1. Wrong result key (hermes_cli/voice.py)

   transcribe_recording() returns {"success": bool, "transcript": str},
   matching cli.py:_voice_stop_and_transcribe. The wrapper was reading
   result.get("text"), which is None, so every successful Groq / local
   STT response was thrown away and the 3-strikes halt fired after
   three silent-looking cycles. Fixed by reading "transcript" and also
   honouring "success" like the CLI does. Updated the loop simulation
   tests to return the correct shape.

2. TTS speak-back was missing (tui_gateway/server.py + hermes_cli/voice.py)

   The TUI had a voice.toggle "tts" subcommand but nothing downstream
   actually read the flag — agent replies never spoke. Mirrored
   cli.py:8747-8754's dispatch: on message.complete with status ==
   "complete", if _voice_tts_enabled() is true, spawn a daemon thread
   running speak_text(response). Rewrote speak_text as a full port of
   cli.py:_voice_speak_response — same markdown-strip regex pipeline
   (code blocks, links, bold/italic, inline code, headers, list bullets,
   horizontal rules, excessive newlines), same 4000-char cap, same
   explicit mp3 output path, same MP3-over-OGG playback choice (afplay
   misbehaves on OGG), same cleanup of both extensions. Keeps TUI TTS
   audible output byte-for-byte identical to the classic CLI.

3. Auto-submit swallowed on non-empty composer (createGatewayEventHandler.ts)

   The voice.transcript handler branched on prev input via a setInput
   updater and fired submitRef.current inside the updater when prev was
   empty. React strict mode double-invokes state updaters, which would
   queue the submit twice; and when the composer had any content the
   transcript was merely appended — the agent never saw it. CLI
   _pending_input.put(transcript) unconditionally feeds the transcript
   as the next turn, so match that: always clear the composer and
   setTimeout(() => submitRef.current(text), 0) outside any updater.
   Side effect can't run twice this way, and a half-typed draft on the
   rare occasion is a fair trade vs. silently dropping the turn.

Also added peak_rms to the rec.stop debug line so "recording too quiet"
is diagnosable at a glance when HERMES_VOICE_DEBUG=1.

											
										
										
											2026-04-24 01:27:19 +03:00
+								        from tools.tts_tool import text_to_speech_tool
 								        tts_text = text[:4000] if len(text) > 4000 else text
 								        tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text)             # fenced code blocks
 								        tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text)    # [text](url) → text
 								        tts_text = re.sub(r'https?://\S+', '', tts_text)                # bare URLs
 								        tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text)            # bold
 								        tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text)                # italic
 								        tts_text = re.sub(r'`(.+?)`', r'\1', tts_text)                  # inline code
 								        tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE)  # headers
 								        tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE)  # list bullets
 								        tts_text = re.sub(r'---+', '', tts_text)                        # horizontal rules
 								        tts_text = re.sub(r'\n{3,}', '\n\n', tts_text)                  # excess newlines
 								        tts_text = tts_text.strip()
 								        if not tts_text:
 								            return
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
-												fix(tui): voice TTS speak-back + transcript-key bug + auto-submit

Three issues surfaced during end-to-end testing of the CLI-parity voice
loop and are fixed together because they all blocked "speak → agent
responds → TTS reads it back" from working at all:

1. Wrong result key (hermes_cli/voice.py)

   transcribe_recording() returns {"success": bool, "transcript": str},
   matching cli.py:_voice_stop_and_transcribe. The wrapper was reading
   result.get("text"), which is None, so every successful Groq / local
   STT response was thrown away and the 3-strikes halt fired after
   three silent-looking cycles. Fixed by reading "transcript" and also
   honouring "success" like the CLI does. Updated the loop simulation
   tests to return the correct shape.

2. TTS speak-back was missing (tui_gateway/server.py + hermes_cli/voice.py)

   The TUI had a voice.toggle "tts" subcommand but nothing downstream
   actually read the flag — agent replies never spoke. Mirrored
   cli.py:8747-8754's dispatch: on message.complete with status ==
   "complete", if _voice_tts_enabled() is true, spawn a daemon thread
   running speak_text(response). Rewrote speak_text as a full port of
   cli.py:_voice_speak_response — same markdown-strip regex pipeline
   (code blocks, links, bold/italic, inline code, headers, list bullets,
   horizontal rules, excessive newlines), same 4000-char cap, same
   explicit mp3 output path, same MP3-over-OGG playback choice (afplay
   misbehaves on OGG), same cleanup of both extensions. Keeps TUI TTS
   audible output byte-for-byte identical to the classic CLI.

3. Auto-submit swallowed on non-empty composer (createGatewayEventHandler.ts)

   The voice.transcript handler branched on prev input via a setInput
   updater and fired submitRef.current inside the updater when prev was
   empty. React strict mode double-invokes state updaters, which would
   queue the submit twice; and when the composer had any content the
   transcript was merely appended — the agent never saw it. CLI
   _pending_input.put(transcript) unconditionally feeds the transcript
   as the next turn, so match that: always clear the composer and
   setTimeout(() => submitRef.current(text), 0) outside any updater.
   Side effect can't run twice this way, and a half-typed draft on the
   rare occasion is a fair trade vs. silently dropping the turn.

Also added peak_rms to the rec.stop debug line so "recording too quiet"
is diagnosable at a glance when HERMES_VOICE_DEBUG=1.

											
										
										
											2026-04-24 01:27:19 +03:00
+								        # MP3 output path, pre-chosen so we can play the MP3 directly even
 								        # when text_to_speech_tool auto-converts to OGG for messaging
 								        # platforms.  afplay's OGG support is flaky, MP3 always works.
 								        os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
 								        mp3_path = os.path.join(
 								            tempfile.gettempdir(),
 								            "hermes_voice",
 								            f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
 								        )
 								        _debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
 								        text_to_speech_tool(text=tts_text, output_path=mp3_path)
 								        if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
 								            _debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
 								            play_audio_file(mp3_path)
 								            try:
 								                os.unlink(mp3_path)
 								                ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
 								                if os.path.isfile(ogg_path):
 								                    os.unlink(ogg_path)
 								            except OSError:
 								                pass
 								        else:
 								            _debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
 								    except Exception as e:
 								        logger.warning("Voice TTS playback failed: %s", e)
 								        _debug(f"speak_text raised {type(e).__name__}: {e}")
-												fix(tui): break TTS→STT feedback loop + colorize REC badge

TTS feedback loop (hermes_cli/voice.py)

The VAD loop kept the microphone live while speak_text played the
agent's reply over the speakers, so the reply itself was picked up,
transcribed, and submitted — the agent then replied to its own echo
("Ha, looks like we're in a loop").

Ported cli.py:_voice_tts_done synchronisation:

- _tts_playing: threading.Event (initially set = "not playing").
- speak_text cancels the active recorder before opening the speakers,
  clears _tts_playing, and on exit waits 300 ms before re-starting the
  recorder — long enough for the OS audio device to settle so afplay
  and sounddevice don't race for it.
- _continuous_on_silence now waits on _tts_playing (up to 60 s) before
  re-arming the mic with another 300 ms gap, mirroring
  cli.py:10619-10621.  If the user flips voice off during the wait the
  loop exits cleanly instead of fighting for the device.

Without both halves the loop races: if the silence callback fires
before TTS starts it re-arms immediately; if TTS is already playing
the pause-and-resume path catches it.

Red REC badge (ui-tui appChrome + useMainApp)

Classic CLI (cli.py:_get_voice_status_fragments) renders "● REC" in
red and "◉ STT" in amber.  TUI was showing a dim "REC" with no dot,
making it hard to spot at a glance.  voiceLabel now emits the same
glyphs and appChrome colours them via t.color.error / t.color.warn,
falling back to dim for the idle label.

											
										
										
											2026-04-24 01:33:10 +03:00
+								    finally:
 								        _tts_playing.set()
 								        _debug("speak_text: TTS done")
 								        # Re-arm the mic so the user can answer without pressing Ctrl+B.
 								        # Small delay lets the OS flush speaker output and afplay fully
 								        # release the audio device before sounddevice re-opens the input.
 								        if paused_recording:
 								            time.sleep(0.3)
 								            with _continuous_lock:
 								                if _continuous_active and _continuous_recorder is not None:
 								                    try:
 								                        _continuous_recorder.start(
 								                            on_silence_stop=_continuous_on_silence
 								                        )
 								                        _debug("speak_text: recording resumed after TTS")
 								                    except Exception as e:
 								                        logger.warning(
 								                            "failed to resume recorder after TTS: %s", e
 								                        )