feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
"""
|
|
|
|
|
Tests for custom command-type TTS providers.
|
|
|
|
|
|
|
|
|
|
These tests cover the ``tts.providers.<name>`` registry: built-in
|
|
|
|
|
precedence, command resolution, placeholder rendering, shell-quote
|
|
|
|
|
context handling, timeout / failure cleanup, voice_compatible opt-in,
|
|
|
|
|
and max_text_length lookup.
|
|
|
|
|
|
|
|
|
|
Nothing here talks to a real TTS engine. The shell command itself is
|
|
|
|
|
portable: we write bytes to ``{output_path}`` using ``python -c`` so
|
|
|
|
|
the tests run identically on Linux, macOS, and (with minor quoting
|
|
|
|
|
differences) Windows.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import subprocess
|
|
|
|
|
import sys
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Optional
|
|
|
|
|
from unittest.mock import patch
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
from tools.tts_tool import (
|
|
|
|
|
BUILTIN_TTS_PROVIDERS,
|
|
|
|
|
COMMAND_TTS_OUTPUT_FORMATS,
|
|
|
|
|
DEFAULT_COMMAND_TTS_MAX_TEXT_LENGTH,
|
|
|
|
|
DEFAULT_COMMAND_TTS_OUTPUT_FORMAT,
|
|
|
|
|
DEFAULT_COMMAND_TTS_TIMEOUT_SECONDS,
|
|
|
|
|
_generate_command_tts,
|
|
|
|
|
_get_command_tts_output_format,
|
|
|
|
|
_get_command_tts_timeout,
|
|
|
|
|
_get_named_provider_config,
|
|
|
|
|
_has_any_command_tts_provider,
|
|
|
|
|
_is_command_provider_config,
|
|
|
|
|
_is_command_tts_voice_compatible,
|
|
|
|
|
_iter_command_providers,
|
|
|
|
|
_render_command_tts_template,
|
|
|
|
|
_resolve_command_provider_config,
|
|
|
|
|
_resolve_max_text_length,
|
|
|
|
|
_shell_quote_context,
|
|
|
|
|
check_tts_requirements,
|
|
|
|
|
text_to_speech_tool,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Helpers
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def _python_copy_command(output_placeholder: str = "{output_path}") -> str:
|
|
|
|
|
"""Return a cross-platform shell command that copies {input_path} -> output."""
|
|
|
|
|
interpreter = sys.executable
|
|
|
|
|
return (
|
|
|
|
|
f'"{interpreter}" -c "import shutil, sys; '
|
|
|
|
|
f'shutil.copyfile(sys.argv[1], sys.argv[2])" '
|
|
|
|
|
f'{{input_path}} {output_placeholder}'
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# _resolve_command_provider_config / built-in precedence
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class TestResolveCommandProviderConfig:
|
|
|
|
|
def test_builtin_names_are_never_command_providers(self):
|
|
|
|
|
cfg = {
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai": {"type": "command", "command": "echo hi"},
|
|
|
|
|
"edge": {"type": "command", "command": "echo hi"},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
for name in BUILTIN_TTS_PROVIDERS:
|
|
|
|
|
assert _resolve_command_provider_config(name, cfg) is None
|
|
|
|
|
|
|
|
|
|
def test_missing_provider_returns_none(self):
|
|
|
|
|
cfg = {"providers": {}}
|
|
|
|
|
assert _resolve_command_provider_config("nope", cfg) is None
|
|
|
|
|
|
|
|
|
|
def test_user_declared_command_provider_resolves(self):
|
|
|
|
|
cfg = {
|
|
|
|
|
"providers": {
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
"piper-cli": {"type": "command", "command": "piper-cli foo"},
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
},
|
|
|
|
|
}
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
resolved = _resolve_command_provider_config("piper-cli", cfg)
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
assert resolved is not None
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
assert resolved["command"] == "piper-cli foo"
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
|
|
|
|
|
def test_type_command_is_implied_when_command_is_set(self):
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
cfg = {"providers": {"piper-cli": {"command": "piper-cli foo"}}}
|
|
|
|
|
resolved = _resolve_command_provider_config("piper-cli", cfg)
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
assert resolved is not None
|
|
|
|
|
|
|
|
|
|
def test_other_type_values_reject(self):
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
cfg = {"providers": {"piper-cli": {"type": "python", "command": "piper-cli foo"}}}
|
|
|
|
|
assert _resolve_command_provider_config("piper-cli", cfg) is None
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
|
|
|
|
|
def test_empty_command_rejects(self):
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
cfg = {"providers": {"piper-cli": {"type": "command", "command": " "}}}
|
|
|
|
|
assert _resolve_command_provider_config("piper-cli", cfg) is None
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
|
|
|
|
|
def test_case_insensitive_lookup(self):
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
cfg = {"providers": {"piper-cli": {"type": "command", "command": "x"}}}
|
|
|
|
|
assert _resolve_command_provider_config("PIPER-CLI", cfg) is not None
|
|
|
|
|
|
|
|
|
|
def test_native_piper_cannot_be_shadowed_by_command_entry(self):
|
|
|
|
|
"""Regression guard for PR that added native Piper as a built-in.
|
|
|
|
|
A user's ``tts.providers.piper`` must not override the built-in."""
|
|
|
|
|
cfg = {
|
|
|
|
|
"providers": {
|
|
|
|
|
"piper": {"type": "command", "command": "some-script"},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
assert _resolve_command_provider_config("piper", cfg) is None
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestGetNamedProviderConfig:
|
|
|
|
|
def test_providers_block_wins(self):
|
|
|
|
|
cfg = {"providers": {"voxcpm": {"command": "new"}},
|
|
|
|
|
"voxcpm": {"command": "legacy"}}
|
|
|
|
|
assert _get_named_provider_config(cfg, "voxcpm") == {"command": "new"}
|
|
|
|
|
|
|
|
|
|
def test_legacy_tts_name_block_still_resolves(self):
|
|
|
|
|
cfg = {"voxcpm": {"type": "command", "command": "legacy"}}
|
|
|
|
|
assert _get_named_provider_config(cfg, "voxcpm") == {
|
|
|
|
|
"type": "command", "command": "legacy"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def test_builtin_names_do_not_leak_through_legacy_path(self):
|
|
|
|
|
"""``tts.openai`` must never be mistaken for a command provider."""
|
|
|
|
|
cfg = {"openai": {"command": "oops", "type": "command"}}
|
|
|
|
|
assert _get_named_provider_config(cfg, "openai") == {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestIsCommandProviderConfig:
|
|
|
|
|
def test_empty_dict_is_false(self):
|
|
|
|
|
assert _is_command_provider_config({}) is False
|
|
|
|
|
|
|
|
|
|
def test_non_dict_is_false(self):
|
|
|
|
|
assert _is_command_provider_config("foo") is False
|
|
|
|
|
assert _is_command_provider_config(None) is False
|
|
|
|
|
|
|
|
|
|
def test_type_mismatch_is_false(self):
|
|
|
|
|
assert _is_command_provider_config({"type": "native", "command": "x"}) is False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# _iter_command_providers / _has_any_command_tts_provider
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class TestIterCommandProviders:
|
|
|
|
|
def test_iterates_only_user_command_providers(self):
|
|
|
|
|
cfg = {
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai": {"type": "command", "command": "shouldnt show up"},
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
"piper-cli": {"type": "command", "command": "piper-cli"},
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
"voxcpm": {"type": "command", "command": "voxcpm"},
|
|
|
|
|
"broken": {"type": "command", "command": ""},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
names = sorted(name for name, _ in _iter_command_providers(cfg))
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
assert names == ["piper-cli", "voxcpm"]
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
|
|
|
|
|
def test_has_any_command_provider_detects_declared(self):
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
cfg = {"providers": {"piper-cli": {"type": "command", "command": "piper-cli"}}}
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
assert _has_any_command_tts_provider(cfg) is True
|
|
|
|
|
|
|
|
|
|
def test_has_any_command_provider_when_none(self):
|
|
|
|
|
assert _has_any_command_tts_provider({"providers": {}}) is False
|
|
|
|
|
assert _has_any_command_tts_provider({}) is False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# config getters
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class TestConfigGetters:
|
|
|
|
|
def test_timeout_defaults(self):
|
|
|
|
|
assert _get_command_tts_timeout({}) == float(DEFAULT_COMMAND_TTS_TIMEOUT_SECONDS)
|
|
|
|
|
|
|
|
|
|
def test_timeout_coerces_string(self):
|
|
|
|
|
assert _get_command_tts_timeout({"timeout": "45"}) == 45.0
|
|
|
|
|
|
|
|
|
|
def test_timeout_rejects_non_positive(self):
|
|
|
|
|
assert _get_command_tts_timeout({"timeout": 0}) == float(DEFAULT_COMMAND_TTS_TIMEOUT_SECONDS)
|
|
|
|
|
assert _get_command_tts_timeout({"timeout": -1}) == float(DEFAULT_COMMAND_TTS_TIMEOUT_SECONDS)
|
|
|
|
|
|
|
|
|
|
def test_timeout_rejects_garbage(self):
|
|
|
|
|
assert _get_command_tts_timeout({"timeout": "fast"}) == float(DEFAULT_COMMAND_TTS_TIMEOUT_SECONDS)
|
|
|
|
|
|
|
|
|
|
def test_timeout_seconds_alias(self):
|
|
|
|
|
assert _get_command_tts_timeout({"timeout_seconds": 90}) == 90.0
|
|
|
|
|
|
|
|
|
|
def test_output_format_defaults(self):
|
|
|
|
|
assert _get_command_tts_output_format({}) == DEFAULT_COMMAND_TTS_OUTPUT_FORMAT
|
|
|
|
|
|
|
|
|
|
def test_output_format_path_override(self):
|
|
|
|
|
assert _get_command_tts_output_format({}, "/tmp/clip.wav") == "wav"
|
|
|
|
|
|
|
|
|
|
def test_output_format_unknown_path_falls_back_to_config(self):
|
|
|
|
|
assert _get_command_tts_output_format({"format": "ogg"}, "/tmp/clip.xyz") == "ogg"
|
|
|
|
|
|
|
|
|
|
def test_output_format_rejects_unknown(self):
|
|
|
|
|
assert _get_command_tts_output_format({"format": "m4a"}) == DEFAULT_COMMAND_TTS_OUTPUT_FORMAT
|
|
|
|
|
|
|
|
|
|
def test_output_format_supported_set(self):
|
|
|
|
|
assert COMMAND_TTS_OUTPUT_FORMATS == frozenset({"mp3", "wav", "ogg", "flac"})
|
|
|
|
|
|
|
|
|
|
def test_voice_compatible_boolean(self):
|
|
|
|
|
assert _is_command_tts_voice_compatible({"voice_compatible": True}) is True
|
|
|
|
|
assert _is_command_tts_voice_compatible({"voice_compatible": False}) is False
|
|
|
|
|
|
|
|
|
|
def test_voice_compatible_string(self):
|
|
|
|
|
assert _is_command_tts_voice_compatible({"voice_compatible": "yes"}) is True
|
|
|
|
|
assert _is_command_tts_voice_compatible({"voice_compatible": "0"}) is False
|
|
|
|
|
|
|
|
|
|
def test_voice_compatible_default_off(self):
|
|
|
|
|
assert _is_command_tts_voice_compatible({}) is False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# _resolve_max_text_length for command providers
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class TestMaxTextLengthForCommandProviders:
|
|
|
|
|
def test_default_for_command_provider(self):
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
cfg = {"providers": {"piper-cli": {"type": "command", "command": "x"}}}
|
|
|
|
|
assert _resolve_max_text_length("piper-cli", cfg) == DEFAULT_COMMAND_TTS_MAX_TEXT_LENGTH
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
|
|
|
|
|
def test_override_under_providers(self):
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
cfg = {"providers": {"piper-cli": {"type": "command", "command": "x", "max_text_length": 2500}}}
|
|
|
|
|
assert _resolve_max_text_length("piper-cli", cfg) == 2500
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
|
|
|
|
|
def test_override_under_legacy_tts_name_block(self):
|
feat(tts): add Piper as a native local TTS provider (closes #8508) (#17885)
Piper (OHF-Voice/piper1-gpl) is a fast, local neural TTS engine from the
Home Assistant project that supports 44 languages with zero API keys.
Adds it as a native built-in provider alongside edge/neutts/kittentts,
installable via 'hermes tools' with one keystroke.
What ships:
- New 'piper' built-in provider in tools/tts_tool.py
- Lazy import via _import_piper()
- Module-level voice cache keyed on (model_path, use_cuda) so switching
voices doesn't invalidate older cached voices
- _resolve_piper_voice_path() accepts either an absolute .onnx path or a
voice name (auto-downloaded on first use via 'python -m
piper.download_voices --download-dir <cache>')
- Voice cache at ~/.hermes/cache/piper-voices/ (profile-aware via
get_hermes_dir)
- Optional SynthesisConfig knobs: length_scale, noise_scale,
noise_w_scale, volume, normalize_audio, use_cuda — passed through
only when configured, so older piper-tts versions aren't broken
- WAV output then ffmpeg conversion path (same as neutts/kittentts) so
Telegram voice bubbles work when ffmpeg is present
- Piper added to BUILTIN_TTS_PROVIDERS so a user's
tts.providers.piper.command cannot shadow the native provider
(regression test included)
- 'hermes tools' wizard entry
- Piper appears under Voice and TTS as local free, with
'pip install piper-tts' auto-install via post_setup handler
- Prints voice-catalog URL and default-voice info after install
- config.yaml defaults
- tts.piper.voice defaults to en_US-lessac-medium
- Commented advanced knobs for discoverability
- Docs
- New 'Piper (local, 44 languages)' section in features/tts.md
explaining install path, voice switching, pre-downloaded voices,
and advanced knobs
- Piper listed in the ten-provider table and ffmpeg table
- Custom-command-providers section updated to drop the Piper example
(now native) and add a piper-custom example for users with their own
trained .onnx models
- overview.md bumps provider count to ten
- Tests (tests/tools/test_tts_piper.py, 16 tests)
- Registration (BUILTIN_TTS_PROVIDERS, PROVIDER_MAX_TEXT_LENGTH)
- _resolve_piper_voice_path across every branch: direct .onnx path,
cached voice name, fresh download with correct CLI args, download
failure, successful-exit-but-missing-files, empty voice to default
- _generate_piper_tts: loads voice once, reuses cache, voice-name
download wiring, advanced knobs flow through SynthesisConfig
- text_to_speech_tool end-to-end dispatch and missing-package error
- check_tts_requirements: piper availability toggles the return value
- Regression guard: piper cannot be shadowed by a command provider
with the same name
- Pre-existing test_tts_mistral test broadened to mock the new
piper/kittentts/command-provider checks (otherwise it false-passes
when piper is installed in the test venv)
E2E verification (live):
Actual pip install piper-tts, config piper + en_US-lessac-low,
text_to_speech_tool call, voice auto-downloaded from HuggingFace,
WAV synthesized, ffmpeg-converted to Ogg/Opus. Second call hits the
cache (~60ms). Cache dir populated with .onnx and .onnx.json.
This caught a real bug during development: the first pass used '-d' as
the download-dir flag; the actual piper.download_voices CLI wants
'--download-dir'. Fixed before PR opened.
2026-04-30 02:53:20 -07:00
|
|
|
cfg = {"piper-cli": {"type": "command", "command": "x", "max_text_length": 7777}}
|
|
|
|
|
assert _resolve_max_text_length("piper-cli", cfg) == 7777
|
feat(tts): add command-type provider registry under tts.providers.<name> (#17843)
Reshape of PR #17211 (@versun). Lets users wire any local or external
TTS CLI into Hermes without adding engine-specific Python code. Users
declare any number of named providers in config.yaml and switch between
them with tts.provider: <name>, alongside the built-ins (edge, openai,
elevenlabs, …).
Config shape:
tts:
provider: piper-en
providers:
piper-en:
type: command
command: 'piper -m ~/model.onnx -f {output_path} < {input_path}'
output_format: wav
Placeholders: {input_path}, {text_path}, {output_path}, {format},
{voice}, {model}, {speed}. Use {{ / }} for literal braces.
Key behavior:
- Built-in provider names always win — a tts.providers.openai entry
cannot shadow the native OpenAI provider.
- type: command is the default when command: is set.
- Placeholder values are shell-quote-aware (bare / single / double
context), so paths with spaces and shell metacharacters are safe.
- Default delivery is a regular audio attachment. voice_compatible: true
opts in to Telegram voice-bubble delivery via ffmpeg Opus conversion.
- Command failures (non-zero exit, timeout, empty output) surface to
the agent with stderr/stdout included so you can debug from chat.
- Process-tree kill on timeout (Unix killpg, Windows taskkill /T).
- max_text_length defaults to 5000 for command providers; override
under tts.providers.<name>.max_text_length.
Tests: tests/tools/test_tts_command_providers.py — 42 new tests cover
provider resolution, shell-quote context, placeholder rendering with
injection payloads, timeout, non-zero exit, empty output, voice_compatible
opt-in, and end-to-end dispatch through text_to_speech_tool. All 88
pre-existing TTS tests still pass.
Docs: new "Custom command providers" section in
website/docs/user-guide/features/tts.md with three worked examples
(Piper, VoxCPM, MLX-Kokoro), placeholder reference, optional keys,
behavior notes, and security caveat.
E2E-verified live: isolated HERMES_HOME, command provider declared in
config.yaml, text_to_speech_tool dispatches through the registered
shell command and the output file is produced as expected.
Co-authored-by: Versun <me+github7604@versun.org>
2026-04-30 02:29:08 -07:00
|
|
|
|
|
|
|
|
def test_non_command_unknown_provider_still_falls_back(self):
|
|
|
|
|
assert _resolve_max_text_length("unknown", {}) > 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# _shell_quote_context / template rendering
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class TestShellQuoteContext:
|
|
|
|
|
def test_bare_context(self):
|
|
|
|
|
tpl = 'tts {output_path}'
|
|
|
|
|
pos = tpl.index("{output_path}")
|
|
|
|
|
assert _shell_quote_context(tpl, pos) is None
|
|
|
|
|
|
|
|
|
|
def test_inside_single_quotes(self):
|
|
|
|
|
tpl = "tts '{output_path}'"
|
|
|
|
|
pos = tpl.index("{output_path}")
|
|
|
|
|
assert _shell_quote_context(tpl, pos) == "'"
|
|
|
|
|
|
|
|
|
|
def test_inside_double_quotes(self):
|
|
|
|
|
tpl = 'tts "{output_path}"'
|
|
|
|
|
pos = tpl.index("{output_path}")
|
|
|
|
|
assert _shell_quote_context(tpl, pos) == '"'
|
|
|
|
|
|
|
|
|
|
def test_escaped_double_quote_inside_double(self):
|
|
|
|
|
tpl = r'tts "foo \" {output_path}"'
|
|
|
|
|
pos = tpl.index("{output_path}")
|
|
|
|
|
assert _shell_quote_context(tpl, pos) == '"'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestRenderCommandTtsTemplate:
|
|
|
|
|
def test_substitutes_all_placeholders(self):
|
|
|
|
|
placeholders = {
|
|
|
|
|
"input_path": "/tmp/in.txt",
|
|
|
|
|
"text_path": "/tmp/in.txt",
|
|
|
|
|
"output_path": "/tmp/out.mp3",
|
|
|
|
|
"format": "mp3",
|
|
|
|
|
"voice": "af_sky",
|
|
|
|
|
"model": "tiny",
|
|
|
|
|
"speed": "1.0",
|
|
|
|
|
}
|
|
|
|
|
rendered = _render_command_tts_template(
|
|
|
|
|
"tts --voice {voice} --in {input_path} --out {output_path}",
|
|
|
|
|
placeholders,
|
|
|
|
|
)
|
|
|
|
|
assert "af_sky" in rendered
|
|
|
|
|
assert "/tmp/out.mp3" in rendered
|
|
|
|
|
|
|
|
|
|
def test_quotes_paths_with_spaces(self):
|
|
|
|
|
placeholders = {
|
|
|
|
|
"input_path": "/tmp/Jane Doe/in.txt",
|
|
|
|
|
"text_path": "/tmp/Jane Doe/in.txt",
|
|
|
|
|
"output_path": "/tmp/out.mp3",
|
|
|
|
|
"format": "mp3",
|
|
|
|
|
"voice": "",
|
|
|
|
|
"model": "",
|
|
|
|
|
"speed": "1.0",
|
|
|
|
|
}
|
|
|
|
|
rendered = _render_command_tts_template(
|
|
|
|
|
"tts --in {input_path} --out {output_path}",
|
|
|
|
|
placeholders,
|
|
|
|
|
)
|
|
|
|
|
# shlex.quote wraps space-containing paths in single quotes on POSIX.
|
|
|
|
|
if os.name != "nt":
|
|
|
|
|
assert "'/tmp/Jane Doe/in.txt'" in rendered
|
|
|
|
|
|
|
|
|
|
def test_literal_braces_survive(self):
|
|
|
|
|
placeholders = {
|
|
|
|
|
"input_path": "/tmp/in.txt", "text_path": "/tmp/in.txt",
|
|
|
|
|
"output_path": "/tmp/out.mp3", "format": "mp3",
|
|
|
|
|
"voice": "", "model": "", "speed": "1.0",
|
|
|
|
|
}
|
|
|
|
|
rendered = _render_command_tts_template(
|
|
|
|
|
"echo '{{not a placeholder}}' && tts --in {input_path}",
|
|
|
|
|
placeholders,
|
|
|
|
|
)
|
|
|
|
|
assert "{not a placeholder}" in rendered
|
|
|
|
|
|
|
|
|
|
def test_injection_is_neutralized(self):
|
|
|
|
|
"""Embedded shell metacharacters in a placeholder value must be quoted."""
|
|
|
|
|
placeholders = {
|
|
|
|
|
"input_path": "/tmp/in.txt", "text_path": "/tmp/in.txt",
|
|
|
|
|
"output_path": "/tmp/out; rm -rf /",
|
|
|
|
|
"format": "mp3",
|
|
|
|
|
"voice": "$(whoami)", "model": "", "speed": "1.0",
|
|
|
|
|
}
|
|
|
|
|
rendered = _render_command_tts_template(
|
|
|
|
|
"tts --voice {voice} --out {output_path}",
|
|
|
|
|
placeholders,
|
|
|
|
|
)
|
|
|
|
|
# The injection payload must not appear unquoted in the rendered
|
|
|
|
|
# command. On POSIX shlex.quote wraps the value in single quotes.
|
|
|
|
|
if os.name != "nt":
|
|
|
|
|
assert "'$(whoami)'" in rendered or "'\\''" in rendered
|
|
|
|
|
assert "; rm -rf /" not in rendered.replace(
|
|
|
|
|
"'/tmp/out; rm -rf /'", "",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def test_preserves_shell_quoting_style(self):
|
|
|
|
|
placeholders = {
|
|
|
|
|
"input_path": "/tmp/in.txt", "text_path": "/tmp/in.txt",
|
|
|
|
|
"output_path": "/tmp/out.mp3", "format": "mp3",
|
|
|
|
|
"voice": "bob's voice", "model": "", "speed": "1.0",
|
|
|
|
|
}
|
|
|
|
|
# When the template wraps the placeholder in double quotes we must
|
|
|
|
|
# escape for that context, not collapse to single-quoted form.
|
|
|
|
|
rendered = _render_command_tts_template(
|
|
|
|
|
'tts --voice "{voice}"',
|
|
|
|
|
placeholders,
|
|
|
|
|
)
|
|
|
|
|
assert '"bob\'s voice"' in rendered
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# End-to-end: _generate_command_tts
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class TestGenerateCommandTts:
|
|
|
|
|
def test_writes_output_file(self, tmp_path):
|
|
|
|
|
out = tmp_path / "clip.mp3"
|
|
|
|
|
config = {"command": _python_copy_command()}
|
|
|
|
|
result = _generate_command_tts(
|
|
|
|
|
"hello world",
|
|
|
|
|
str(out),
|
|
|
|
|
"py-copy",
|
|
|
|
|
config,
|
|
|
|
|
{},
|
|
|
|
|
)
|
|
|
|
|
assert result == str(out)
|
|
|
|
|
assert out.exists()
|
|
|
|
|
# The command copied the input text file over to output, so it
|
|
|
|
|
# contains the original UTF-8 text.
|
|
|
|
|
assert out.read_text(encoding="utf-8") == "hello world"
|
|
|
|
|
|
|
|
|
|
def test_empty_command_raises(self, tmp_path):
|
|
|
|
|
with pytest.raises(ValueError, match="is not configured"):
|
|
|
|
|
_generate_command_tts(
|
|
|
|
|
"hello",
|
|
|
|
|
str(tmp_path / "x.mp3"),
|
|
|
|
|
"empty",
|
|
|
|
|
{"command": " "},
|
|
|
|
|
{},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def test_nonzero_exit_raises_runtime(self, tmp_path):
|
|
|
|
|
config = {"command": f'"{sys.executable}" -c "import sys; sys.exit(3)"'}
|
|
|
|
|
with pytest.raises(RuntimeError, match="exited with code 3"):
|
|
|
|
|
_generate_command_tts(
|
|
|
|
|
"hello",
|
|
|
|
|
str(tmp_path / "x.mp3"),
|
|
|
|
|
"failing",
|
|
|
|
|
config,
|
|
|
|
|
{},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def test_empty_output_raises_runtime(self, tmp_path):
|
|
|
|
|
# This command completes successfully but writes nothing.
|
|
|
|
|
config = {"command": f'"{sys.executable}" -c "pass"'}
|
|
|
|
|
with pytest.raises(RuntimeError, match="produced no output"):
|
|
|
|
|
_generate_command_tts(
|
|
|
|
|
"hello",
|
|
|
|
|
str(tmp_path / "x.mp3"),
|
|
|
|
|
"silent",
|
|
|
|
|
config,
|
|
|
|
|
{},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(os.name == "nt", reason="POSIX-only timeout semantics")
|
|
|
|
|
def test_timeout_raises_runtime(self, tmp_path):
|
|
|
|
|
config = {
|
|
|
|
|
"command": f'"{sys.executable}" -c "import time; time.sleep(10)"',
|
|
|
|
|
"timeout": 1,
|
|
|
|
|
}
|
|
|
|
|
with pytest.raises(RuntimeError, match="timed out"):
|
|
|
|
|
_generate_command_tts(
|
|
|
|
|
"hello",
|
|
|
|
|
str(tmp_path / "x.mp3"),
|
|
|
|
|
"slow",
|
|
|
|
|
config,
|
|
|
|
|
{},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# text_to_speech_tool integration
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class TestTextToSpeechToolWithCommandProvider:
|
|
|
|
|
def test_command_provider_dispatches_end_to_end(self, tmp_path):
|
|
|
|
|
cfg = {
|
|
|
|
|
"tts": {
|
|
|
|
|
"provider": "py-copy",
|
|
|
|
|
"providers": {
|
|
|
|
|
"py-copy": {
|
|
|
|
|
"type": "command",
|
|
|
|
|
"command": _python_copy_command(),
|
|
|
|
|
"output_format": "mp3",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
out = tmp_path / "clip.mp3"
|
|
|
|
|
|
|
|
|
|
# Patch the config loader used by the tool so we don't touch disk.
|
|
|
|
|
def fake_load():
|
|
|
|
|
return cfg["tts"]
|
|
|
|
|
|
|
|
|
|
with patch("tools.tts_tool._load_tts_config", fake_load):
|
|
|
|
|
result = text_to_speech_tool(text="hi", output_path=str(out))
|
|
|
|
|
data = json.loads(result)
|
|
|
|
|
assert data["success"] is True, data
|
|
|
|
|
assert data["provider"] == "py-copy"
|
|
|
|
|
assert data["voice_compatible"] is False
|
|
|
|
|
assert Path(data["file_path"]).exists()
|
|
|
|
|
|
|
|
|
|
def test_voice_compatible_opt_in_toggles_flag(self, tmp_path):
|
|
|
|
|
"""voice_compatible=true is reflected in the response when the
|
|
|
|
|
file is already .ogg (no ffmpeg needed)."""
|
|
|
|
|
cfg = {
|
|
|
|
|
"provider": "py-copy-ogg",
|
|
|
|
|
"providers": {
|
|
|
|
|
"py-copy-ogg": {
|
|
|
|
|
"type": "command",
|
|
|
|
|
"command": _python_copy_command(),
|
|
|
|
|
"output_format": "ogg",
|
|
|
|
|
"voice_compatible": True,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
out = tmp_path / "clip.ogg"
|
|
|
|
|
|
|
|
|
|
with patch("tools.tts_tool._load_tts_config", return_value=cfg):
|
|
|
|
|
result = text_to_speech_tool(text="hi", output_path=str(out))
|
|
|
|
|
data = json.loads(result)
|
|
|
|
|
assert data["success"] is True
|
|
|
|
|
assert data["voice_compatible"] is True
|
|
|
|
|
assert data["media_tag"].startswith("[[audio_as_voice]]")
|
|
|
|
|
|
|
|
|
|
def test_missing_command_falls_through_to_builtin(self, tmp_path):
|
|
|
|
|
"""A provider entry with an empty command is not a command
|
|
|
|
|
provider; the tool should not raise a "command not configured"
|
|
|
|
|
error but fall through to the built-in resolution path."""
|
|
|
|
|
cfg = {
|
|
|
|
|
"provider": "broken",
|
|
|
|
|
"providers": {
|
|
|
|
|
"broken": {"type": "command", "command": " "},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
with patch("tools.tts_tool._load_tts_config", return_value=cfg):
|
|
|
|
|
result = text_to_speech_tool(text="hi", output_path=str(tmp_path / "x.mp3"))
|
|
|
|
|
data = json.loads(result)
|
|
|
|
|
# The response should not carry the command-provider error text.
|
|
|
|
|
err = (data.get("error") or "").lower()
|
|
|
|
|
assert "tts.providers.broken.command is not configured" not in err
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestCheckTtsRequirements:
|
|
|
|
|
def test_configured_command_provider_satisfies_requirement(self):
|
|
|
|
|
cfg = {"providers": {"x": {"type": "command", "command": "echo x"}}}
|
|
|
|
|
with patch("tools.tts_tool._load_tts_config", return_value=cfg):
|
|
|
|
|
assert check_tts_requirements() is True
|