hermes_cli/voice.py

"""Process-wide voice recording + TTS API for the TUI gateway.

Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
(text-to-speech) behind idempotent, stateful entry points that the gateway's
``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
call from a dedicated thread. The gateway imports this module lazily so that
missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
an ``ImportError`` at call time, not at startup.

Two usage modes are exposed:

* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
  manually-bounded capture used when the caller drives the start/stop pair
  explicitly.
* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
  the classic CLI voice mode: recording auto-stops on silence, transcribes,
  hands the result to a callback, and then auto-restarts for the next turn.
  Three consecutive no-speech cycles stop the loop and fire
  ``on_silent_limit`` so the UI can turn the mode off.
"""

from __future__ import annotations

import logging
import os
import sys
import threading
from typing import Any, Callable, Optional

from tools.voice_mode import (
    create_audio_recorder,
    is_whisper_hallucination,
    play_audio_file,
    transcribe_recording,
)

logger = logging.getLogger(__name__)


def _debug(msg: str) -> None:
    """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.

    Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
    which createGatewayEventHandler shows as an Activity line — exactly
    what we need to diagnose "why didn't the loop auto-restart?" in the
    user's real terminal without shipping a separate debug RPC.

    Any OSError / BrokenPipeError is swallowed because this fires from
    background threads (silence callback, TTS daemon, beep) where a
    broken stderr pipe must not kill the whole gateway — the main
    command pipe (stdin+stdout) is what actually matters.
    """
    if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
        return
    try:
        print(f"[voice] {msg}", file=sys.stderr, flush=True)
    except (BrokenPipeError, OSError):
        pass


def _beeps_enabled() -> bool:
    """CLI parity: voice.beep_enabled in config.yaml (default True)."""
    try:
        from hermes_cli.config import load_config

        voice_cfg = load_config().get("voice", {})
        if isinstance(voice_cfg, dict):
            return bool(voice_cfg.get("beep_enabled", True))
    except Exception:
        pass
    return True


def _play_beep(frequency: int, count: int = 1) -> None:
    """Audible cue matching cli.py's record/stop beeps.

    880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
    660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
    Best-effort — sounddevice failures are silently swallowed so the
    voice loop never breaks because a speaker was unavailable.
    """
    if not _beeps_enabled():
        return
    try:
        from tools.voice_mode import play_beep

        play_beep(frequency=frequency, count=count)
    except Exception as e:
        _debug(f"beep {frequency}Hz failed: {e}")

# ── Push-to-talk state ───────────────────────────────────────────────
_recorder = None
_recorder_lock = threading.Lock()

# ── Continuous (VAD) state ───────────────────────────────────────────
_continuous_lock = threading.Lock()
_continuous_active = False
_continuous_recorder: Any = None

# ── TTS-vs-STT feedback guard ────────────────────────────────────────
# When TTS plays the agent reply over the speakers, the live microphone
# picks it up and transcribes the agent's own voice as user input — an
# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
# playing, set while silent. _continuous_on_silence waits on it before
# re-arming the recorder, and speak_text itself cancels any live capture
# before starting playback so the tail of the previous utterance doesn't
# leak into the mic.
_tts_playing = threading.Event()
_tts_playing.set()  # initially "not playing"
_continuous_on_transcript: Optional[Callable[[str], None]] = None
_continuous_on_status: Optional[Callable[[str], None]] = None
_continuous_on_silent_limit: Optional[Callable[[], None]] = None
_continuous_no_speech_count = 0
_CONTINUOUS_NO_SPEECH_LIMIT = 3


# ── Push-to-talk API ─────────────────────────────────────────────────


def start_recording() -> None:
    """Begin capturing from the default input device (push-to-talk).

    Idempotent — calling again while a recording is in progress is a no-op.
    """
    global _recorder

    with _recorder_lock:
        if _recorder is not None and getattr(_recorder, "is_recording", False):
            return
        rec = create_audio_recorder()
        rec.start()
        _recorder = rec


def stop_and_transcribe() -> Optional[str]:
    """Stop the active push-to-talk recording, transcribe, return text.

    Returns ``None`` when no recording is active, when the microphone
    captured no speech, or when Whisper returned a known hallucination.
    """
    global _recorder

    with _recorder_lock:
        rec = _recorder
        _recorder = None

    if rec is None:
        return None

    wav_path = rec.stop()
    if not wav_path:
        return None

    try:
        result = transcribe_recording(wav_path)
    except Exception as e:
        logger.warning("voice transcription failed: %s", e)
        return None
    finally:
        try:
            if os.path.isfile(wav_path):
                os.unlink(wav_path)
        except Exception:
            pass

    # transcribe_recording returns {"success": bool, "transcript": str, ...}
    # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
    if not result.get("success"):
        return None
    text = (result.get("transcript") or "").strip()
    if not text or is_whisper_hallucination(text):
        return None

    return text


# ── Continuous (VAD) API ─────────────────────────────────────────────


def start_continuous(
    on_transcript: Callable[[str], None],
    on_status: Optional[Callable[[str], None]] = None,
    on_silent_limit: Optional[Callable[[], None]] = None,
    silence_threshold: int = 200,
    silence_duration: float = 3.0,
) -> None:
    """Start a VAD-driven continuous recording loop.

    The loop calls ``on_transcript(text)`` each time speech is detected and
    transcribed successfully, then auto-restarts. After
    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
    picked up at all) the loop stops itself and calls ``on_silent_limit``
    so the UI can reflect "voice off". Idempotent — calling while already
    active is a no-op.

    ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
    ``"idle"`` so the UI can show a live indicator.
    """
    global _continuous_active, _continuous_recorder
    global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
    global _continuous_no_speech_count

    with _continuous_lock:
        if _continuous_active:
            _debug("start_continuous: already active — no-op")
            return
        _continuous_active = True
        _continuous_on_transcript = on_transcript
        _continuous_on_status = on_status
        _continuous_on_silent_limit = on_silent_limit
        _continuous_no_speech_count = 0

        if _continuous_recorder is None:
            _continuous_recorder = create_audio_recorder()

        _continuous_recorder._silence_threshold = silence_threshold
        _continuous_recorder._silence_duration = silence_duration
        rec = _continuous_recorder

    _debug(
        f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
    )

    # CLI parity: single 880 Hz beep *before* opening the stream — placing
    # the beep after stream.start() on macOS triggers a CoreAudio conflict
    # (cli.py:7528 comment).
    _play_beep(frequency=880, count=1)

    try:
        rec.start(on_silence_stop=_continuous_on_silence)
    except Exception as e:
        logger.error("failed to start continuous recording: %s", e)
        _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
        with _continuous_lock:
            _continuous_active = False
        raise

    if on_status:
        try:
            on_status("listening")
        except Exception:
            pass


def stop_continuous() -> None:
    """Stop the active continuous loop and release the microphone.

    Idempotent — calling while not active is a no-op. Any in-flight
    transcription completes but its result is discarded (the callback
    checks ``_continuous_active`` before firing).
    """
    global _continuous_active, _continuous_on_transcript
    global _continuous_on_status, _continuous_on_silent_limit
    global _continuous_recorder, _continuous_no_speech_count

    with _continuous_lock:
        if not _continuous_active:
            return
        _continuous_active = False
        rec = _continuous_recorder
        on_status = _continuous_on_status
        _continuous_on_transcript = None
        _continuous_on_status = None
        _continuous_on_silent_limit = None
        _continuous_no_speech_count = 0

    if rec is not None:
        try:
            # cancel() (not stop()) discards buffered frames — the loop
            # is over, we don't want to transcribe a half-captured turn.
            rec.cancel()
        except Exception as e:
            logger.warning("failed to cancel recorder: %s", e)

    # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
    # silence-auto-stop path plays).
    _play_beep(frequency=660, count=2)

    if on_status:
        try:
            on_status("idle")
        except Exception:
            pass


def is_continuous_active() -> bool:
    """Whether a continuous voice loop is currently running."""
    with _continuous_lock:
        return _continuous_active


def _continuous_on_silence() -> None:
    """AudioRecorder silence callback — runs in a daemon thread.

    Stops the current capture, transcribes, delivers the text via
    ``on_transcript``, and — if the loop is still active — starts the
    next capture. Three consecutive silent cycles end the loop.
    """
    global _continuous_active, _continuous_no_speech_count

    _debug("_continuous_on_silence: fired")

    with _continuous_lock:
        if not _continuous_active:
            _debug("_continuous_on_silence: loop inactive — abort")
            return
        rec = _continuous_recorder
        on_transcript = _continuous_on_transcript
        on_status = _continuous_on_status
        on_silent_limit = _continuous_on_silent_limit

    if rec is None:
        _debug("_continuous_on_silence: no recorder — abort")
        return

    if on_status:
        try:
            on_status("transcribing")
        except Exception:
            pass

    wav_path = rec.stop()
    # Peak RMS is the critical diagnostic when stop() returns None despite
    # the VAD firing — tells us at a glance whether the mic was too quiet
    # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
    peak_rms = getattr(rec, "_peak_rms", -1)
    _debug(
        f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
    )

    # CLI parity: double 660 Hz beep after the stream stops (safe from the
    # CoreAudio conflict that blocks pre-start beeps).
    _play_beep(frequency=660, count=2)

    transcript: Optional[str] = None

    if wav_path:
        try:
            result = transcribe_recording(wav_path)
            # transcribe_recording returns {"success": bool, "transcript": str,
            # "error": str?} — NOT {"text": str}.  Using the wrong key silently
            # produced empty transcripts even when Groq/local STT returned fine,
            # which masqueraded as "not hearing the user" to the caller.
            success = bool(result.get("success"))
            text = (result.get("transcript") or "").strip()
            err = result.get("error")
            _debug(
                f"_continuous_on_silence: transcribe -> success={success} "
                f"text={text!r} err={err!r}"
            )
            if success and text and not is_whisper_hallucination(text):
                transcript = text
        except Exception as e:
            logger.warning("continuous transcription failed: %s", e)
            _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
        finally:
            try:
                if os.path.isfile(wav_path):
                    os.unlink(wav_path)
            except Exception:
                pass

    with _continuous_lock:
        if not _continuous_active:
            # User stopped us while we were transcribing — discard.
            _debug("_continuous_on_silence: stopped during transcribe — no restart")
            return
        if transcript:
            _continuous_no_speech_count = 0
        else:
            _continuous_no_speech_count += 1
        should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
        no_speech = _continuous_no_speech_count

    if transcript and on_transcript:
        try:
            on_transcript(transcript)
        except Exception as e:
            logger.warning("on_transcript callback raised: %s", e)

    if should_halt:
        _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
        with _continuous_lock:
            _continuous_active = False
            _continuous_no_speech_count = 0
        if on_silent_limit:
            try:
                on_silent_limit()
            except Exception:
                pass
        try:
            rec.cancel()
        except Exception:
            pass
        if on_status:
            try:
                on_status("idle")
            except Exception:
                pass
        return

    # CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
    # finish before re-arming the mic, then leave a small gap to avoid
    # catching the tail of the speaker output.  Without this the voice
    # loop becomes a feedback loop — the agent's spoken reply lands
    # back in the mic and gets re-submitted.
    if not _tts_playing.is_set():
        _debug("_continuous_on_silence: waiting for TTS to finish")
        _tts_playing.wait(timeout=60)
        import time as _time
        _time.sleep(0.3)

        # User may have stopped the loop during the wait.
        with _continuous_lock:
            if not _continuous_active:
                _debug("_continuous_on_silence: stopped while waiting for TTS")
                return

    # Restart for the next turn.
    _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
    _play_beep(frequency=880, count=1)
    try:
        rec.start(on_silence_stop=_continuous_on_silence)
    except Exception as e:
        logger.error("failed to restart continuous recording: %s", e)
        _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
        with _continuous_lock:
            _continuous_active = False
        return

    if on_status:
        try:
            on_status("listening")
        except Exception:
            pass


# ── TTS API ──────────────────────────────────────────────────────────


def speak_text(text: str) -> None:
    """Synthesize ``text`` with the configured TTS provider and play it.

    Mirrors cli.py:_voice_speak_response exactly — same markdown strip
    pipeline, same 4000-char cap, same explicit mp3 output path, same
    MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
    of both extensions. Keeping these in sync means a voice-mode TTS
    session in the TUI sounds identical to one in the classic CLI.

    While playback is in flight the module-level _tts_playing Event is
    cleared so the continuous-recording loop knows to wait before
    re-arming the mic (otherwise the agent's spoken reply feedback-loops
    through the microphone and the agent ends up replying to itself).
    """
    if not text or not text.strip():
        return

    import re
    import tempfile
    import time

    # Cancel any live capture before we open the speakers — otherwise the
    # last ~200ms of the user's turn tail + the first syllables of our TTS
    # both end up in the next recording window.  The continuous loop will
    # re-arm itself after _tts_playing flips back (see _continuous_on_silence).
    paused_recording = False
    with _continuous_lock:
        if (
            _continuous_active
            and _continuous_recorder is not None
            and getattr(_continuous_recorder, "is_recording", False)
        ):
            try:
                _continuous_recorder.cancel()
                paused_recording = True
            except Exception as e:
                logger.warning("failed to pause recorder for TTS: %s", e)

    _tts_playing.clear()
    _debug(f"speak_text: TTS begin (paused_recording={paused_recording})")

    try:
        from tools.tts_tool import text_to_speech_tool

        tts_text = text[:4000] if len(text) > 4000 else text
        tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text)             # fenced code blocks
        tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text)    # [text](url) → text
        tts_text = re.sub(r'https?://\S+', '', tts_text)                # bare URLs
        tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text)            # bold
        tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text)                # italic
        tts_text = re.sub(r'`(.+?)`', r'\1', tts_text)                  # inline code
        tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE)  # headers
        tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE)  # list bullets
        tts_text = re.sub(r'---+', '', tts_text)                        # horizontal rules
        tts_text = re.sub(r'\n{3,}', '\n\n', tts_text)                  # excess newlines
        tts_text = tts_text.strip()
        if not tts_text:
            return

        # MP3 output path, pre-chosen so we can play the MP3 directly even
        # when text_to_speech_tool auto-converts to OGG for messaging
        # platforms.  afplay's OGG support is flaky, MP3 always works.
        os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
        mp3_path = os.path.join(
            tempfile.gettempdir(),
            "hermes_voice",
            f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
        )

        _debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
        text_to_speech_tool(text=tts_text, output_path=mp3_path)

        if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
            _debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
            play_audio_file(mp3_path)
            try:
                os.unlink(mp3_path)
                ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
                if os.path.isfile(ogg_path):
                    os.unlink(ogg_path)
            except OSError:
                pass
        else:
            _debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
    except Exception as e:
        logger.warning("Voice TTS playback failed: %s", e)
        _debug(f"speak_text raised {type(e).__name__}: {e}")
    finally:
        _tts_playing.set()
        _debug("speak_text: TTS done")

        # Re-arm the mic so the user can answer without pressing Ctrl+B.
        # Small delay lets the OS flush speaker output and afplay fully
        # release the audio device before sounddevice re-opens the input.
        if paused_recording:
            time.sleep(0.3)
            with _continuous_lock:
                if _continuous_active and _continuous_recorder is not None:
                    try:
                        _continuous_recorder.start(
                            on_silence_stop=_continuous_on_silence
                        )
                        _debug("speak_text: recording resumed after TTS")
                    except Exception as e:
                        logger.warning(
                            "failed to resume recorder after TTS: %s", e
                        )
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								"""Process-wide voice recording + TTS API for the TUI gateway.
 								Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
 								(text-to-speech) behind idempotent, stateful entry points that the gateway's
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
 								call from a dedicated thread. The gateway imports this module lazily so that
 								missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
 								an ``ImportError`` at call time, not at startup.
 								Two usage modes are exposed:
 								* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
 								  manually-bounded capture used when the caller drives the start/stop pair
 								  explicitly.
 								* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
 								  the classic CLI voice mode: recording auto-stops on silence, transcribes,
 								  hands the result to a callback, and then auto-restarts for the next turn.
 								  Three consecutive no-speech cycles stop the loop and fire
 								  ``on_silent_limit`` so the UI can turn the mode off.
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								"""
 								from __future__ import annotations
 								import logging
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								import os
 								import sys
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								import threading
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								from typing import Any, Callable, Optional
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
 								from tools.voice_mode import (
 								    create_audio_recorder,
 								    is_whisper_hallucination,
 								    play_audio_file,
 								    transcribe_recording,
 								)
 								logger = logging.getLogger(__name__)
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
 								def _debug(msg: str) -> None:
 								    """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
 								    Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
 								    which createGatewayEventHandler shows as an Activity line — exactly
 								    what we need to diagnose "why didn't the loop auto-restart?" in the
 								    user's real terminal without shipping a separate debug RPC.
-												fix(tui): ignore SIGPIPE so stderr back-pressure can't kill the gateway

Crash-log stack trace (tui_gateway_crash.log) from the user's session
pinned the regression: SIGPIPE arrived while main thread was blocked on
for-raw-in-sys.stdin — i.e., a background thread (debug print to stderr,
most likely from HERMES_VOICE_DEBUG=1) wrote to a pipe whose buffer the
TUI hadn't drained yet, and SIG_DFL promptly killed the process.

Two fixes that together restore CLI parity:

- entry.py: SIGPIPE → SIG_IGN instead of the _log_signal handler that
  then exited. With SIG_IGN, Python raises BrokenPipeError on the
  offending write, which write_json already handles with a clean exit
  via _log_exit. SIGTERM / SIGHUP still route through _log_signal so
  real termination signals remain diagnosable.

- hermes_cli/voice.py:_debug: wrap the stderr print in a BrokenPipeError
  / OSError try/except. This runs from daemon threads (silence callback,
  TTS playback, beep), so a broken stderr must not escape and ride up
  into the main event loop.

Verified by spawning the gateway subprocess locally:
  voice.toggle status → 200 OK, process stays alive, clean exit on
  stdin close logs "reason=stdin EOF" instead of a silent reap.

											
										
										
											2026-04-24 01:54:20 +03:00
 								    Any OSError / BrokenPipeError is swallowed because this fires from
 								    background threads (silence callback, TTS daemon, beep) where a
 								    broken stderr pipe must not kill the whole gateway — the main
 								    command pipe (stdin+stdout) is what actually matters.
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    """
-												fix(tui): ignore SIGPIPE so stderr back-pressure can't kill the gateway

Crash-log stack trace (tui_gateway_crash.log) from the user's session
pinned the regression: SIGPIPE arrived while main thread was blocked on
for-raw-in-sys.stdin — i.e., a background thread (debug print to stderr,
most likely from HERMES_VOICE_DEBUG=1) wrote to a pipe whose buffer the
TUI hadn't drained yet, and SIG_DFL promptly killed the process.

Two fixes that together restore CLI parity:

- entry.py: SIGPIPE → SIG_IGN instead of the _log_signal handler that
  then exited. With SIG_IGN, Python raises BrokenPipeError on the
  offending write, which write_json already handles with a clean exit
  via _log_exit. SIGTERM / SIGHUP still route through _log_signal so
  real termination signals remain diagnosable.

- hermes_cli/voice.py:_debug: wrap the stderr print in a BrokenPipeError
  / OSError try/except. This runs from daemon threads (silence callback,
  TTS playback, beep), so a broken stderr must not escape and ride up
  into the main event loop.

Verified by spawning the gateway subprocess locally:
  voice.toggle status → 200 OK, process stays alive, clean exit on
  stdin close logs "reason=stdin EOF" instead of a silent reap.

											
										
										
											2026-04-24 01:54:20 +03:00
+								    if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
 								        return
 								    try:
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								        print(f"[voice] {msg}", file=sys.stderr, flush=True)
-												fix(tui): ignore SIGPIPE so stderr back-pressure can't kill the gateway

Crash-log stack trace (tui_gateway_crash.log) from the user's session
pinned the regression: SIGPIPE arrived while main thread was blocked on
for-raw-in-sys.stdin — i.e., a background thread (debug print to stderr,
most likely from HERMES_VOICE_DEBUG=1) wrote to a pipe whose buffer the
TUI hadn't drained yet, and SIG_DFL promptly killed the process.

Two fixes that together restore CLI parity:

- entry.py: SIGPIPE → SIG_IGN instead of the _log_signal handler that
  then exited. With SIG_IGN, Python raises BrokenPipeError on the
  offending write, which write_json already handles with a clean exit
  via _log_exit. SIGTERM / SIGHUP still route through _log_signal so
  real termination signals remain diagnosable.

- hermes_cli/voice.py:_debug: wrap the stderr print in a BrokenPipeError
  / OSError try/except. This runs from daemon threads (silence callback,
  TTS playback, beep), so a broken stderr must not escape and ride up
  into the main event loop.

Verified by spawning the gateway subprocess locally:
  voice.toggle status → 200 OK, process stays alive, clean exit on
  stdin close logs "reason=stdin EOF" instead of a silent reap.

											
										
										
											2026-04-24 01:54:20 +03:00
+								    except (BrokenPipeError, OSError):
 								        pass
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
 								def _beeps_enabled() -> bool:
 								    """CLI parity: voice.beep_enabled in config.yaml (default True)."""
 								    try:
 								        from hermes_cli.config import load_config
 								        voice_cfg = load_config().get("voice", {})
 								        if isinstance(voice_cfg, dict):
 								            return bool(voice_cfg.get("beep_enabled", True))
 								    except Exception:
 								        pass
 								    return True
 								def _play_beep(frequency: int, count: int = 1) -> None:
 								    """Audible cue matching cli.py's record/stop beeps.
 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
 								    Best-effort — sounddevice failures are silently swallowed so the
 								    voice loop never breaks because a speaker was unavailable.
 								    """
 								    if not _beeps_enabled():
 								        return
 								    try:
 								        from tools.voice_mode import play_beep
 								        play_beep(frequency=frequency, count=count)
 								    except Exception as e:
 								        _debug(f"beep {frequency}Hz failed: {e}")
 								# ── Push-to-talk state ───────────────────────────────────────────────
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								_recorder = None
 								_recorder_lock = threading.Lock()
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								# ── Continuous (VAD) state ───────────────────────────────────────────
 								_continuous_lock = threading.Lock()
 								_continuous_active = False
 								_continuous_recorder: Any = None
-												fix(tui): break TTS→STT feedback loop + colorize REC badge

TTS feedback loop (hermes_cli/voice.py)

The VAD loop kept the microphone live while speak_text played the
agent's reply over the speakers, so the reply itself was picked up,
transcribed, and submitted — the agent then replied to its own echo
("Ha, looks like we're in a loop").

Ported cli.py:_voice_tts_done synchronisation:

- _tts_playing: threading.Event (initially set = "not playing").
- speak_text cancels the active recorder before opening the speakers,
  clears _tts_playing, and on exit waits 300 ms before re-starting the
  recorder — long enough for the OS audio device to settle so afplay
  and sounddevice don't race for it.
- _continuous_on_silence now waits on _tts_playing (up to 60 s) before
  re-arming the mic with another 300 ms gap, mirroring
  cli.py:10619-10621.  If the user flips voice off during the wait the
  loop exits cleanly instead of fighting for the device.

Without both halves the loop races: if the silence callback fires
before TTS starts it re-arms immediately; if TTS is already playing
the pause-and-resume path catches it.

Red REC badge (ui-tui appChrome + useMainApp)

Classic CLI (cli.py:_get_voice_status_fragments) renders "● REC" in
red and "◉ STT" in amber.  TUI was showing a dim "REC" with no dot,
making it hard to spot at a glance.  voiceLabel now emits the same
glyphs and appChrome colours them via t.color.error / t.color.warn,
falling back to dim for the idle label.

											
										
										
											2026-04-24 01:33:10 +03:00
 								# ── TTS-vs-STT feedback guard ────────────────────────────────────────
 								# When TTS plays the agent reply over the speakers, the live microphone
 								# picks it up and transcribes the agent's own voice as user input — an
 								# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
 								# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
 								# playing, set while silent. _continuous_on_silence waits on it before
 								# re-arming the recorder, and speak_text itself cancels any live capture
 								# before starting playback so the tail of the previous utterance doesn't
 								# leak into the mic.
 								_tts_playing = threading.Event()
 								_tts_playing.set()  # initially "not playing"
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								_continuous_on_transcript: Optional[Callable[[str], None]] = None
 								_continuous_on_status: Optional[Callable[[str], None]] = None
 								_continuous_on_silent_limit: Optional[Callable[[], None]] = None
 								_continuous_no_speech_count = 0
 								_CONTINUOUS_NO_SPEECH_LIMIT = 3
 								# ── Push-to-talk API ─────────────────────────────────────────────────
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
 								def start_recording() -> None:
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    """Begin capturing from the default input device (push-to-talk).
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    Idempotent — calling again while a recording is in progress is a no-op.
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								    """
 								    global _recorder
 								    with _recorder_lock:
 								        if _recorder is not None and getattr(_recorder, "is_recording", False):
 								            return
 								        rec = create_audio_recorder()
 								        rec.start()
 								        _recorder = rec
 								def stop_and_transcribe() -> Optional[str]:
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    """Stop the active push-to-talk recording, transcribe, return text.
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
 								    Returns ``None`` when no recording is active, when the microphone
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    captured no speech, or when Whisper returned a known hallucination.
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								    """
 								    global _recorder
 								    with _recorder_lock:
 								        rec = _recorder
 								        _recorder = None
 								    if rec is None:
 								        return None
 								    wav_path = rec.stop()
 								    if not wav_path:
 								        return None
 								    try:
 								        result = transcribe_recording(wav_path)
 								    except Exception as e:
 								        logger.warning("voice transcription failed: %s", e)
 								        return None
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    finally:
 								        try:
 								            if os.path.isfile(wav_path):
 								                os.unlink(wav_path)
 								        except Exception:
 								            pass
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    # transcribe_recording returns {"success": bool, "transcript": str, ...}
 								    # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
 								    if not result.get("success"):
 								        return None
 								    text = (result.get("transcript") or "").strip()
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								    if not text or is_whisper_hallucination(text):
 								        return None
 								    return text
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								# ── Continuous (VAD) API ─────────────────────────────────────────────
 								def start_continuous(
 								    on_transcript: Callable[[str], None],
 								    on_status: Optional[Callable[[str], None]] = None,
 								    on_silent_limit: Optional[Callable[[], None]] = None,
 								    silence_threshold: int = 200,
 								    silence_duration: float = 3.0,
 								) -> None:
 								    """Start a VAD-driven continuous recording loop.
 								    The loop calls ``on_transcript(text)`` each time speech is detected and
 								    transcribed successfully, then auto-restarts. After
 								    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
 								    picked up at all) the loop stops itself and calls ``on_silent_limit``
 								    so the UI can reflect "voice off". Idempotent — calling while already
 								    active is a no-op.
 								    ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
 								    ``"idle"`` so the UI can show a live indicator.
 								    """
 								    global _continuous_active, _continuous_recorder
 								    global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
 								    global _continuous_no_speech_count
 								    with _continuous_lock:
 								        if _continuous_active:
 								            _debug("start_continuous: already active — no-op")
 								            return
 								        _continuous_active = True
 								        _continuous_on_transcript = on_transcript
 								        _continuous_on_status = on_status
 								        _continuous_on_silent_limit = on_silent_limit
 								        _continuous_no_speech_count = 0
 								        if _continuous_recorder is None:
 								            _continuous_recorder = create_audio_recorder()
 								        _continuous_recorder._silence_threshold = silence_threshold
 								        _continuous_recorder._silence_duration = silence_duration
 								        rec = _continuous_recorder
 								    _debug(
 								        f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
 								    )
 								    # CLI parity: single 880 Hz beep *before* opening the stream — placing
 								    # the beep after stream.start() on macOS triggers a CoreAudio conflict
 								    # (cli.py:7528 comment).
 								    _play_beep(frequency=880, count=1)
 								    try:
 								        rec.start(on_silence_stop=_continuous_on_silence)
 								    except Exception as e:
 								        logger.error("failed to start continuous recording: %s", e)
 								        _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
 								        with _continuous_lock:
 								            _continuous_active = False
 								        raise
 								    if on_status:
 								        try:
 								            on_status("listening")
 								        except Exception:
 								            pass
 								def stop_continuous() -> None:
 								    """Stop the active continuous loop and release the microphone.
 								    Idempotent — calling while not active is a no-op. Any in-flight
 								    transcription completes but its result is discarded (the callback
 								    checks ``_continuous_active`` before firing).
 								    """
 								    global _continuous_active, _continuous_on_transcript
 								    global _continuous_on_status, _continuous_on_silent_limit
 								    global _continuous_recorder, _continuous_no_speech_count
 								    with _continuous_lock:
 								        if not _continuous_active:
 								            return
 								        _continuous_active = False
 								        rec = _continuous_recorder
 								        on_status = _continuous_on_status
 								        _continuous_on_transcript = None
 								        _continuous_on_status = None
 								        _continuous_on_silent_limit = None
 								        _continuous_no_speech_count = 0
 								    if rec is not None:
 								        try:
 								            # cancel() (not stop()) discards buffered frames — the loop
 								            # is over, we don't want to transcribe a half-captured turn.
 								            rec.cancel()
 								        except Exception as e:
 								            logger.warning("failed to cancel recorder: %s", e)
 								    # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
 								    # silence-auto-stop path plays).
 								    _play_beep(frequency=660, count=2)
 								    if on_status:
 								        try:
 								            on_status("idle")
 								        except Exception:
 								            pass
 								def is_continuous_active() -> bool:
 								    """Whether a continuous voice loop is currently running."""
 								    with _continuous_lock:
 								        return _continuous_active
 								def _continuous_on_silence() -> None:
 								    """AudioRecorder silence callback — runs in a daemon thread.
 								    Stops the current capture, transcribes, delivers the text via
 								    ``on_transcript``, and — if the loop is still active — starts the
 								    next capture. Three consecutive silent cycles end the loop.
 								    """
 								    global _continuous_active, _continuous_no_speech_count
 								    _debug("_continuous_on_silence: fired")
 								    with _continuous_lock:
 								        if not _continuous_active:
 								            _debug("_continuous_on_silence: loop inactive — abort")
 								            return
 								        rec = _continuous_recorder
 								        on_transcript = _continuous_on_transcript
 								        on_status = _continuous_on_status
 								        on_silent_limit = _continuous_on_silent_limit
 								    if rec is None:
 								        _debug("_continuous_on_silence: no recorder — abort")
 								        return
 								    if on_status:
 								        try:
 								            on_status("transcribing")
 								        except Exception:
 								            pass
 								    wav_path = rec.stop()
 								    # Peak RMS is the critical diagnostic when stop() returns None despite
 								    # the VAD firing — tells us at a glance whether the mic was too quiet
 								    # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
 								    peak_rms = getattr(rec, "_peak_rms", -1)
 								    _debug(
 								        f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
 								    )
 								    # CLI parity: double 660 Hz beep after the stream stops (safe from the
 								    # CoreAudio conflict that blocks pre-start beeps).
 								    _play_beep(frequency=660, count=2)
 								    transcript: Optional[str] = None
 								    if wav_path:
 								        try:
 								            result = transcribe_recording(wav_path)
 								            # transcribe_recording returns {"success": bool, "transcript": str,
 								            # "error": str?} — NOT {"text": str}.  Using the wrong key silently
 								            # produced empty transcripts even when Groq/local STT returned fine,
 								            # which masqueraded as "not hearing the user" to the caller.
 								            success = bool(result.get("success"))
 								            text = (result.get("transcript") or "").strip()
 								            err = result.get("error")
 								            _debug(
 								                f"_continuous_on_silence: transcribe -> success={success} "
 								                f"text={text!r} err={err!r}"
 								            )
 								            if success and text and not is_whisper_hallucination(text):
 								                transcript = text
 								        except Exception as e:
 								            logger.warning("continuous transcription failed: %s", e)
 								            _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
 								        finally:
 								            try:
 								                if os.path.isfile(wav_path):
 								                    os.unlink(wav_path)
 								            except Exception:
 								                pass
 								    with _continuous_lock:
 								        if not _continuous_active:
 								            # User stopped us while we were transcribing — discard.
 								            _debug("_continuous_on_silence: stopped during transcribe — no restart")
 								            return
 								        if transcript:
 								            _continuous_no_speech_count = 0
 								        else:
 								            _continuous_no_speech_count += 1
 								        should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
 								        no_speech = _continuous_no_speech_count
 								    if transcript and on_transcript:
 								        try:
 								            on_transcript(transcript)
 								        except Exception as e:
 								            logger.warning("on_transcript callback raised: %s", e)
 								    if should_halt:
 								        _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
 								        with _continuous_lock:
 								            _continuous_active = False
 								            _continuous_no_speech_count = 0
 								        if on_silent_limit:
 								            try:
 								                on_silent_limit()
 								            except Exception:
 								                pass
 								        try:
 								            rec.cancel()
 								        except Exception:
 								            pass
 								        if on_status:
 								            try:
 								                on_status("idle")
 								            except Exception:
 								                pass
 								        return
-												fix(tui): break TTS→STT feedback loop + colorize REC badge

TTS feedback loop (hermes_cli/voice.py)

The VAD loop kept the microphone live while speak_text played the
agent's reply over the speakers, so the reply itself was picked up,
transcribed, and submitted — the agent then replied to its own echo
("Ha, looks like we're in a loop").

Ported cli.py:_voice_tts_done synchronisation:

- _tts_playing: threading.Event (initially set = "not playing").
- speak_text cancels the active recorder before opening the speakers,
  clears _tts_playing, and on exit waits 300 ms before re-starting the
  recorder — long enough for the OS audio device to settle so afplay
  and sounddevice don't race for it.
- _continuous_on_silence now waits on _tts_playing (up to 60 s) before
  re-arming the mic with another 300 ms gap, mirroring
  cli.py:10619-10621.  If the user flips voice off during the wait the
  loop exits cleanly instead of fighting for the device.

Without both halves the loop races: if the silence callback fires
before TTS starts it re-arms immediately; if TTS is already playing
the pause-and-resume path catches it.

Red REC badge (ui-tui appChrome + useMainApp)

Classic CLI (cli.py:_get_voice_status_fragments) renders "● REC" in
red and "◉ STT" in amber.  TUI was showing a dim "REC" with no dot,
making it hard to spot at a glance.  voiceLabel now emits the same
glyphs and appChrome colours them via t.color.error / t.color.warn,
falling back to dim for the idle label.

											
										
										
											2026-04-24 01:33:10 +03:00
+								    # CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
 								    # finish before re-arming the mic, then leave a small gap to avoid
 								    # catching the tail of the speaker output.  Without this the voice
 								    # loop becomes a feedback loop — the agent's spoken reply lands
 								    # back in the mic and gets re-submitted.
 								    if not _tts_playing.is_set():
 								        _debug("_continuous_on_silence: waiting for TTS to finish")
 								        _tts_playing.wait(timeout=60)
 								        import time as _time
 								        _time.sleep(0.3)
 								        # User may have stopped the loop during the wait.
 								        with _continuous_lock:
 								            if not _continuous_active:
 								                _debug("_continuous_on_silence: stopped while waiting for TTS")
 								                return
-												feat(tui): match CLI's voice slash + VAD-continuous recording model

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).

											
										
										
											2026-04-24 00:55:17 +03:00
+								    # Restart for the next turn.
 								    _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
 								    _play_beep(frequency=880, count=1)
 								    try:
 								        rec.start(on_silence_stop=_continuous_on_silence)
 								    except Exception as e:
 								        logger.error("failed to restart continuous recording: %s", e)
 								        _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
 								        with _continuous_lock:
 								            _continuous_active = False
 								        return
 								    if on_status:
 								        try:
 								            on_status("listening")
 								        except Exception:
 								            pass
 								# ── TTS API ──────────────────────────────────────────────────────────
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								def speak_text(text: str) -> None:
 								    """Synthesize ``text`` with the configured TTS provider and play it.
-												fix(tui): voice TTS speak-back + transcript-key bug + auto-submit

Three issues surfaced during end-to-end testing of the CLI-parity voice
loop and are fixed together because they all blocked "speak → agent
responds → TTS reads it back" from working at all:

1. Wrong result key (hermes_cli/voice.py)

   transcribe_recording() returns {"success": bool, "transcript": str},
   matching cli.py:_voice_stop_and_transcribe. The wrapper was reading
   result.get("text"), which is None, so every successful Groq / local
   STT response was thrown away and the 3-strikes halt fired after
   three silent-looking cycles. Fixed by reading "transcript" and also
   honouring "success" like the CLI does. Updated the loop simulation
   tests to return the correct shape.

2. TTS speak-back was missing (tui_gateway/server.py + hermes_cli/voice.py)

   The TUI had a voice.toggle "tts" subcommand but nothing downstream
   actually read the flag — agent replies never spoke. Mirrored
   cli.py:8747-8754's dispatch: on message.complete with status ==
   "complete", if _voice_tts_enabled() is true, spawn a daemon thread
   running speak_text(response). Rewrote speak_text as a full port of
   cli.py:_voice_speak_response — same markdown-strip regex pipeline
   (code blocks, links, bold/italic, inline code, headers, list bullets,
   horizontal rules, excessive newlines), same 4000-char cap, same
   explicit mp3 output path, same MP3-over-OGG playback choice (afplay
   misbehaves on OGG), same cleanup of both extensions. Keeps TUI TTS
   audible output byte-for-byte identical to the classic CLI.

3. Auto-submit swallowed on non-empty composer (createGatewayEventHandler.ts)

   The voice.transcript handler branched on prev input via a setInput
   updater and fired submitRef.current inside the updater when prev was
   empty. React strict mode double-invokes state updaters, which would
   queue the submit twice; and when the composer had any content the
   transcript was merely appended — the agent never saw it. CLI
   _pending_input.put(transcript) unconditionally feeds the transcript
   as the next turn, so match that: always clear the composer and
   setTimeout(() => submitRef.current(text), 0) outside any updater.
   Side effect can't run twice this way, and a half-typed draft on the
   rare occasion is a fair trade vs. silently dropping the turn.

Also added peak_rms to the rec.stop debug line so "recording too quiet"
is diagnosable at a glance when HERMES_VOICE_DEBUG=1.

											
										
										
											2026-04-24 01:27:19 +03:00
+								    Mirrors cli.py:_voice_speak_response exactly — same markdown strip
 								    pipeline, same 4000-char cap, same explicit mp3 output path, same
 								    MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
 								    of both extensions. Keeping these in sync means a voice-mode TTS
 								    session in the TUI sounds identical to one in the classic CLI.
-												fix(tui): break TTS→STT feedback loop + colorize REC badge

TTS feedback loop (hermes_cli/voice.py)

The VAD loop kept the microphone live while speak_text played the
agent's reply over the speakers, so the reply itself was picked up,
transcribed, and submitted — the agent then replied to its own echo
("Ha, looks like we're in a loop").

Ported cli.py:_voice_tts_done synchronisation:

- _tts_playing: threading.Event (initially set = "not playing").
- speak_text cancels the active recorder before opening the speakers,
  clears _tts_playing, and on exit waits 300 ms before re-starting the
  recorder — long enough for the OS audio device to settle so afplay
  and sounddevice don't race for it.
- _continuous_on_silence now waits on _tts_playing (up to 60 s) before
  re-arming the mic with another 300 ms gap, mirroring
  cli.py:10619-10621.  If the user flips voice off during the wait the
  loop exits cleanly instead of fighting for the device.

Without both halves the loop races: if the silence callback fires
before TTS starts it re-arms immediately; if TTS is already playing
the pause-and-resume path catches it.

Red REC badge (ui-tui appChrome + useMainApp)

Classic CLI (cli.py:_get_voice_status_fragments) renders "● REC" in
red and "◉ STT" in amber.  TUI was showing a dim "REC" with no dot,
making it hard to spot at a glance.  voiceLabel now emits the same
glyphs and appChrome colours them via t.color.error / t.color.warn,
falling back to dim for the idle label.

											
										
										
											2026-04-24 01:33:10 +03:00
 								    While playback is in flight the module-level _tts_playing Event is
 								    cleared so the continuous-recording loop knows to wait before
 								    re-arming the mic (otherwise the agent's spoken reply feedback-loops
 								    through the microphone and the agent ends up replying to itself).
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								    """
 								    if not text or not text.strip():
 								        return
-												fix(tui): voice TTS speak-back + transcript-key bug + auto-submit

Three issues surfaced during end-to-end testing of the CLI-parity voice
loop and are fixed together because they all blocked "speak → agent
responds → TTS reads it back" from working at all:

1. Wrong result key (hermes_cli/voice.py)

   transcribe_recording() returns {"success": bool, "transcript": str},
   matching cli.py:_voice_stop_and_transcribe. The wrapper was reading
   result.get("text"), which is None, so every successful Groq / local
   STT response was thrown away and the 3-strikes halt fired after
   three silent-looking cycles. Fixed by reading "transcript" and also
   honouring "success" like the CLI does. Updated the loop simulation
   tests to return the correct shape.

2. TTS speak-back was missing (tui_gateway/server.py + hermes_cli/voice.py)

   The TUI had a voice.toggle "tts" subcommand but nothing downstream
   actually read the flag — agent replies never spoke. Mirrored
   cli.py:8747-8754's dispatch: on message.complete with status ==
   "complete", if _voice_tts_enabled() is true, spawn a daemon thread
   running speak_text(response). Rewrote speak_text as a full port of
   cli.py:_voice_speak_response — same markdown-strip regex pipeline
   (code blocks, links, bold/italic, inline code, headers, list bullets,
   horizontal rules, excessive newlines), same 4000-char cap, same
   explicit mp3 output path, same MP3-over-OGG playback choice (afplay
   misbehaves on OGG), same cleanup of both extensions. Keeps TUI TTS
   audible output byte-for-byte identical to the classic CLI.

3. Auto-submit swallowed on non-empty composer (createGatewayEventHandler.ts)

   The voice.transcript handler branched on prev input via a setInput
   updater and fired submitRef.current inside the updater when prev was
   empty. React strict mode double-invokes state updaters, which would
   queue the submit twice; and when the composer had any content the
   transcript was merely appended — the agent never saw it. CLI
   _pending_input.put(transcript) unconditionally feeds the transcript
   as the next turn, so match that: always clear the composer and
   setTimeout(() => submitRef.current(text), 0) outside any updater.
   Side effect can't run twice this way, and a half-typed draft on the
   rare occasion is a fair trade vs. silently dropping the turn.

Also added peak_rms to the rec.stop debug line so "recording too quiet"
is diagnosable at a glance when HERMES_VOICE_DEBUG=1.

											
										
										
											2026-04-24 01:27:19 +03:00
+								    import re
 								    import tempfile
 								    import time
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
-												fix(tui): break TTS→STT feedback loop + colorize REC badge

TTS feedback loop (hermes_cli/voice.py)

The VAD loop kept the microphone live while speak_text played the
agent's reply over the speakers, so the reply itself was picked up,
transcribed, and submitted — the agent then replied to its own echo
("Ha, looks like we're in a loop").

Ported cli.py:_voice_tts_done synchronisation:

- _tts_playing: threading.Event (initially set = "not playing").
- speak_text cancels the active recorder before opening the speakers,
  clears _tts_playing, and on exit waits 300 ms before re-starting the
  recorder — long enough for the OS audio device to settle so afplay
  and sounddevice don't race for it.
- _continuous_on_silence now waits on _tts_playing (up to 60 s) before
  re-arming the mic with another 300 ms gap, mirroring
  cli.py:10619-10621.  If the user flips voice off during the wait the
  loop exits cleanly instead of fighting for the device.

Without both halves the loop races: if the silence callback fires
before TTS starts it re-arms immediately; if TTS is already playing
the pause-and-resume path catches it.

Red REC badge (ui-tui appChrome + useMainApp)

Classic CLI (cli.py:_get_voice_status_fragments) renders "● REC" in
red and "◉ STT" in amber.  TUI was showing a dim "REC" with no dot,
making it hard to spot at a glance.  voiceLabel now emits the same
glyphs and appChrome colours them via t.color.error / t.color.warn,
falling back to dim for the idle label.

											
										
										
											2026-04-24 01:33:10 +03:00
+								    # Cancel any live capture before we open the speakers — otherwise the
 								    # last ~200ms of the user's turn tail + the first syllables of our TTS
 								    # both end up in the next recording window.  The continuous loop will
 								    # re-arm itself after _tts_playing flips back (see _continuous_on_silence).
 								    paused_recording = False
 								    with _continuous_lock:
 								        if (
 								            _continuous_active
 								            and _continuous_recorder is not None
 								            and getattr(_continuous_recorder, "is_recording", False)
 								        ):
 								            try:
 								                _continuous_recorder.cancel()
 								                paused_recording = True
 								            except Exception as e:
 								                logger.warning("failed to pause recorder for TTS: %s", e)
 								    _tts_playing.clear()
 								    _debug(f"speak_text: TTS begin (paused_recording={paused_recording})")
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
+								    try:
-												fix(tui): voice TTS speak-back + transcript-key bug + auto-submit

Three issues surfaced during end-to-end testing of the CLI-parity voice
loop and are fixed together because they all blocked "speak → agent
responds → TTS reads it back" from working at all:

1. Wrong result key (hermes_cli/voice.py)

   transcribe_recording() returns {"success": bool, "transcript": str},
   matching cli.py:_voice_stop_and_transcribe. The wrapper was reading
   result.get("text"), which is None, so every successful Groq / local
   STT response was thrown away and the 3-strikes halt fired after
   three silent-looking cycles. Fixed by reading "transcript" and also
   honouring "success" like the CLI does. Updated the loop simulation
   tests to return the correct shape.

2. TTS speak-back was missing (tui_gateway/server.py + hermes_cli/voice.py)

   The TUI had a voice.toggle "tts" subcommand but nothing downstream
   actually read the flag — agent replies never spoke. Mirrored
   cli.py:8747-8754's dispatch: on message.complete with status ==
   "complete", if _voice_tts_enabled() is true, spawn a daemon thread
   running speak_text(response). Rewrote speak_text as a full port of
   cli.py:_voice_speak_response — same markdown-strip regex pipeline
   (code blocks, links, bold/italic, inline code, headers, list bullets,
   horizontal rules, excessive newlines), same 4000-char cap, same
   explicit mp3 output path, same MP3-over-OGG playback choice (afplay
   misbehaves on OGG), same cleanup of both extensions. Keeps TUI TTS
   audible output byte-for-byte identical to the classic CLI.

3. Auto-submit swallowed on non-empty composer (createGatewayEventHandler.ts)

   The voice.transcript handler branched on prev input via a setInput
   updater and fired submitRef.current inside the updater when prev was
   empty. React strict mode double-invokes state updaters, which would
   queue the submit twice; and when the composer had any content the
   transcript was merely appended — the agent never saw it. CLI
   _pending_input.put(transcript) unconditionally feeds the transcript
   as the next turn, so match that: always clear the composer and
   setTimeout(() => submitRef.current(text), 0) outside any updater.
   Side effect can't run twice this way, and a half-typed draft on the
   rare occasion is a fair trade vs. silently dropping the turn.

Also added peak_rms to the rec.stop debug line so "recording too quiet"
is diagnosable at a glance when HERMES_VOICE_DEBUG=1.

											
										
										
											2026-04-24 01:27:19 +03:00
+								        from tools.tts_tool import text_to_speech_tool
 								        tts_text = text[:4000] if len(text) > 4000 else text
 								        tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text)             # fenced code blocks
 								        tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text)    # [text](url) → text
 								        tts_text = re.sub(r'https?://\S+', '', tts_text)                # bare URLs
 								        tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text)            # bold
 								        tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text)                # italic
 								        tts_text = re.sub(r'`(.+?)`', r'\1', tts_text)                  # inline code
 								        tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE)  # headers
 								        tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE)  # list bullets
 								        tts_text = re.sub(r'---+', '', tts_text)                        # horizontal rules
 								        tts_text = re.sub(r'\n{3,}', '\n\n', tts_text)                  # excess newlines
 								        tts_text = tts_text.strip()
 								        if not tts_text:
 								            return
-												fix(tui): add missing hermes_cli.voice wrapper for gateway RPC

tui_gateway/server.py:3486/3491/3509 imports start_recording,
stop_and_transcribe, and speak_text from hermes_cli.voice, but the
module never existed (not in git history — never shipped, never
deleted). Every voice.record / voice.tts RPC call hit the ImportError
branch and the TUI surfaced it as "voice module not available — install
audio dependencies" even on boxes with sounddevice / faster-whisper /
numpy installed.

Adds a thin wrapper on top of tools.voice_mode (recording +
transcription) and tools.tts_tool (text-to-speech):

- start_recording() — idempotent; stores the active AudioRecorder in a
  module-global guarded by a Lock so repeat Ctrl+B presses don't fight
  over the mic.
- stop_and_transcribe() — returns None for no-op / no-speech /
  Whisper-hallucination cases so the TUI's existing "no speech detected"
  path keeps working unchanged.
- speak_text(text) — lazily imports tts_tool (optional provider SDKs
  stay unloaded until the first /voice tts call), parses the tool's
  JSON result, and plays the audio via play_audio_file.

Paired with the Ctrl+B keybinding fix in the prior commit, the TUI
voice pipeline now works end-to-end for the first time.

											
										
										
											2026-04-24 00:21:59 +03:00
-												fix(tui): voice TTS speak-back + transcript-key bug + auto-submit

Three issues surfaced during end-to-end testing of the CLI-parity voice
loop and are fixed together because they all blocked "speak → agent
responds → TTS reads it back" from working at all:

1. Wrong result key (hermes_cli/voice.py)

   transcribe_recording() returns {"success": bool, "transcript": str},
   matching cli.py:_voice_stop_and_transcribe. The wrapper was reading
   result.get("text"), which is None, so every successful Groq / local
   STT response was thrown away and the 3-strikes halt fired after
   three silent-looking cycles. Fixed by reading "transcript" and also
   honouring "success" like the CLI does. Updated the loop simulation
   tests to return the correct shape.

2. TTS speak-back was missing (tui_gateway/server.py + hermes_cli/voice.py)

   The TUI had a voice.toggle "tts" subcommand but nothing downstream
   actually read the flag — agent replies never spoke. Mirrored
   cli.py:8747-8754's dispatch: on message.complete with status ==
   "complete", if _voice_tts_enabled() is true, spawn a daemon thread
   running speak_text(response). Rewrote speak_text as a full port of
   cli.py:_voice_speak_response — same markdown-strip regex pipeline
   (code blocks, links, bold/italic, inline code, headers, list bullets,
   horizontal rules, excessive newlines), same 4000-char cap, same
   explicit mp3 output path, same MP3-over-OGG playback choice (afplay
   misbehaves on OGG), same cleanup of both extensions. Keeps TUI TTS
   audible output byte-for-byte identical to the classic CLI.

3. Auto-submit swallowed on non-empty composer (createGatewayEventHandler.ts)

   The voice.transcript handler branched on prev input via a setInput
   updater and fired submitRef.current inside the updater when prev was
   empty. React strict mode double-invokes state updaters, which would
   queue the submit twice; and when the composer had any content the
   transcript was merely appended — the agent never saw it. CLI
   _pending_input.put(transcript) unconditionally feeds the transcript
   as the next turn, so match that: always clear the composer and
   setTimeout(() => submitRef.current(text), 0) outside any updater.
   Side effect can't run twice this way, and a half-typed draft on the
   rare occasion is a fair trade vs. silently dropping the turn.

Also added peak_rms to the rec.stop debug line so "recording too quiet"
is diagnosable at a glance when HERMES_VOICE_DEBUG=1.

											
										
										
											2026-04-24 01:27:19 +03:00
+								        # MP3 output path, pre-chosen so we can play the MP3 directly even
 								        # when text_to_speech_tool auto-converts to OGG for messaging
 								        # platforms.  afplay's OGG support is flaky, MP3 always works.
 								        os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
 								        mp3_path = os.path.join(
 								            tempfile.gettempdir(),
 								            "hermes_voice",
 								            f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
 								        )
 								        _debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
 								        text_to_speech_tool(text=tts_text, output_path=mp3_path)
 								        if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
 								            _debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
 								            play_audio_file(mp3_path)
 								            try:
 								                os.unlink(mp3_path)
 								                ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
 								                if os.path.isfile(ogg_path):
 								                    os.unlink(ogg_path)
 								            except OSError:
 								                pass
 								        else:
 								            _debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
 								    except Exception as e:
 								        logger.warning("Voice TTS playback failed: %s", e)
 								        _debug(f"speak_text raised {type(e).__name__}: {e}")
-												fix(tui): break TTS→STT feedback loop + colorize REC badge

TTS feedback loop (hermes_cli/voice.py)

The VAD loop kept the microphone live while speak_text played the
agent's reply over the speakers, so the reply itself was picked up,
transcribed, and submitted — the agent then replied to its own echo
("Ha, looks like we're in a loop").

Ported cli.py:_voice_tts_done synchronisation:

- _tts_playing: threading.Event (initially set = "not playing").
- speak_text cancels the active recorder before opening the speakers,
  clears _tts_playing, and on exit waits 300 ms before re-starting the
  recorder — long enough for the OS audio device to settle so afplay
  and sounddevice don't race for it.
- _continuous_on_silence now waits on _tts_playing (up to 60 s) before
  re-arming the mic with another 300 ms gap, mirroring
  cli.py:10619-10621.  If the user flips voice off during the wait the
  loop exits cleanly instead of fighting for the device.

Without both halves the loop races: if the silence callback fires
before TTS starts it re-arms immediately; if TTS is already playing
the pause-and-resume path catches it.

Red REC badge (ui-tui appChrome + useMainApp)

Classic CLI (cli.py:_get_voice_status_fragments) renders "● REC" in
red and "◉ STT" in amber.  TUI was showing a dim "REC" with no dot,
making it hard to spot at a glance.  voiceLabel now emits the same
glyphs and appChrome colours them via t.color.error / t.color.warn,
falling back to dim for the idle label.

											
										
										
											2026-04-24 01:33:10 +03:00
+								    finally:
 								        _tts_playing.set()
 								        _debug("speak_text: TTS done")
 								        # Re-arm the mic so the user can answer without pressing Ctrl+B.
 								        # Small delay lets the OS flush speaker output and afplay fully
 								        # release the audio device before sounddevice re-opens the input.
 								        if paused_recording:
 								            time.sleep(0.3)
 								            with _continuous_lock:
 								                if _continuous_active and _continuous_recorder is not None:
 								                    try:
 								                        _continuous_recorder.start(
 								                            on_silence_stop=_continuous_on_silence
 								                        )
 								                        _debug("speak_text: recording resumed after TTS")
 								                    except Exception as e:
 								                        logger.warning(
 								                            "failed to resume recorder after TTS: %s", e
 								                        )