fix: address PR review round 4 — remove web UI, fix audio/import/interface issues

Remove web UI gateway (web.py, tests, docs, toolset, env vars, Platform.WEB enum) per maintainer request — Nous is building their own official chat UI. Fix 1: Replace sd.wait() with polling pattern in play_audio_file() to prevent indefinite hang when audio device stalls (consistent with play_beep()). Fix 2: Use importlib.util.find_spec() for faster_whisper/openai availability checks instead of module-level imports that trigger heavy native library loading (CUDA/cuDNN) at import time. Fix 3: Remove inspect.signature() hack in _send_voice_reply() — add **kwargs to Telegram send_voice() so all adapters accept metadata uniformly. Fix 4: Make session loading resilient to removed platform enum values — skip entries with unknown platforms instead of crashing the entire gateway.
2026-04-28 06:51:16 +08:00 · 2026-03-14 09:06:52 +03:00
parent 1ad5e0ed15
commit 35748a2fb0
17 changed files with 55 additions and 2930 deletions
--- a/.env.example
+++ b/.env.example
@@ -213,13 +213,6 @@ VOICE_TOOLS_OPENAI_KEY=
 # EMAIL_ALLOWED_USERS=your@email.com
 # EMAIL_HOME_ADDRESS=your@email.com
 # Web UI (browser-based chat interface on local network)
 # Access from phone/tablet/desktop at http://<your-ip>:8765
 # WEB_UI_ENABLED=false
 # WEB_UI_PORT=8765
 # WEB_UI_HOST=127.0.0.1            # Use 0.0.0.0 to expose on LAN
 # WEB_UI_TOKEN=                # Auto-generated if empty
 # Gateway-wide: allow ALL users without an allowlist (default: false = deny)
 # Only set to true if you intentionally want open access.
 # GATEWAY_ALLOW_ALL_USERS=false
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -31,7 +31,6 @@ class Platform(Enum):
    SIGNAL = "signal"
    HOMEASSISTANT = "homeassistant"
    EMAIL = "email"
    WEB = "web"
@dataclass
@@ -177,9 +176,6 @@ class GatewayConfig:
            # Email uses extra dict for config (address + imap_host + smtp_host)
            elif platform == Platform.EMAIL and config.extra.get("address"):
                connected.append(platform)
            # Web UI uses enabled flag only
            elif platform == Platform.WEB:
                connected.append(platform)
        return connected
    def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
@@ -470,18 +466,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
                name=os.getenv("EMAIL_HOME_ADDRESS_NAME", "Home"),
            )
    # Web UI
    web_enabled = os.getenv("WEB_UI_ENABLED", "").lower() in ("true", "1", "yes")
    if web_enabled:
        if Platform.WEB not in config.platforms:
            config.platforms[Platform.WEB] = PlatformConfig()
        config.platforms[Platform.WEB].enabled = True
        config.platforms[Platform.WEB].extra.update({
            "port": int(os.getenv("WEB_UI_PORT", "8765")),
            "host": os.getenv("WEB_UI_HOST", "") or "127.0.0.1",
            "token": os.getenv("WEB_UI_TOKEN", ""),
        })
    # Session settings
    idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
    if idle_minutes:
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -311,6 +311,7 @@ class TelegramAdapter(BasePlatformAdapter):
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
        **kwargs,
    ) -> SendResult:
        """Send audio as a native Telegram voice message or audio file."""
        if not self._bot:
--- a/gateway/platforms/web.py
+++ b/gateway/platforms/web.py
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -829,13 +829,6 @@ class GatewayRunner:
                return None
            return EmailAdapter(config)
        elif platform == Platform.WEB:
            from gateway.platforms.web import WebAdapter, check_web_requirements
            if not check_web_requirements():
                logger.warning("Web: aiohttp not installed. Run: pip install aiohttp")
                return None
            return WebAdapter(config)
        return None
    def _is_user_authorized(self, source: SessionSource) -> bool:
@@ -855,11 +848,6 @@ class GatewayRunner:
        if source.platform == Platform.HOMEASSISTANT:
            return True
        # Web UI users are authenticated via token at the WebSocket level.
        # No additional allowlist check needed.
        if source.platform == Platform.WEB:
            return True
        user_id = source.user_id
        if not user_id:
            return False
@@ -978,7 +966,7 @@ class GatewayRunner:
                          "personality", "retry", "undo", "sethome", "set-home",
                          "compress", "usage", "insights", "reload-mcp", "reload_mcp",
                          "update", "title", "resume", "provider", "rollback",
-                          "background", "reasoning", "voice", "remote-control", "remote_control"}
+                          "background", "reasoning", "voice"}
        if command and command in _known_commands:
            await self.hooks.emit(f"command:{command}", {
                "platform": source.platform.value if source.platform else "",
@@ -1053,10 +1041,6 @@ class GatewayRunner:
        if command == "voice":
            return await self._handle_voice_command(event)
        if command in ("remote-control", "remote_control"):
            return await self._handle_remote_control_command(event)
        # User-defined quick commands (bypass agent loop, no LLM call)
        if command:
            quick_commands = self.config.get("quick_commands", {})
@@ -1741,7 +1725,6 @@ class GatewayRunner:
            "`/rollback [number]` — List or restore filesystem checkpoints",
            "`/background <prompt>` — Run a prompt in a separate background session",
            "`/voice [on|off|tts|status]` — Toggle voice reply mode",
            "`/remote-control [port] [token]` — Start web UI for remote access",
            "`/reload-mcp` — Reload MCP servers from config",
            "`/update` — Update Hermes Agent to the latest version",
            "`/help` — Show this message",
@@ -2415,10 +2398,6 @@ class GatewayRunner:
                }
                if event.source.thread_id:
                    send_kwargs["metadata"] = {"thread_id": event.source.thread_id}
                import inspect
                sig = inspect.signature(adapter.send_voice)
                if "metadata" not in sig.parameters:
                    send_kwargs.pop("metadata", None)
                await adapter.send_voice(**send_kwargs)
        except Exception as e:
            logger.warning("Auto voice reply failed: %s", e, exc_info=True)
@@ -2488,62 +2467,6 @@ class GatewayRunner:
            )
        return f"❌ {result['error']}"
    async def _handle_remote_control_command(self, event: MessageEvent) -> str:
        """Handle /remote-control — start or show the web UI for remote access."""
        from gateway.config import Platform, PlatformConfig
        is_dm = event.source and event.source.chat_type == "dm"
        # Already running?
        if Platform.WEB in self.adapters:
            adapter = self.adapters[Platform.WEB]
            local_ip = adapter._get_local_ip()
            token_display = adapter._token if is_dm else "(hidden — use in DM to see token)"
            return (
                f"Web UI already running.\n"
                f"URL: http://{local_ip}:{adapter._port}\n"
                f"Token: {token_display}"
            )
        # Start web adapter on the fly
        try:
            from gateway.platforms.web import WebAdapter, check_web_requirements
            if not check_web_requirements():
                return "Web UI requires aiohttp. Run: pip install aiohttp"
            args = event.get_command_args().strip()
            port = 8765
            token = ""
            for part in args.split():
                if part.isdigit():
                    port = int(part)
                elif part and not part.startswith("-"):
                    token = part
            web_config = PlatformConfig(
                enabled=True,
                extra={"port": port, "host": "127.0.0.1", "token": token},
            )
            adapter = WebAdapter(web_config)
            adapter.set_message_handler(self._handle_message)
            success = await adapter.connect()
            if not success:
                return f"Failed to start Web UI on port {port}. Port may be in use."
            self.adapters[Platform.WEB] = adapter
            local_ip = adapter._get_local_ip()
            token_display = adapter._token if is_dm else "(hidden — use in DM to see token)"
            return (
                f"Web UI started!\n"
                f"URL: http://{local_ip}:{adapter._port}\n"
                f"Token: {token_display}\n"
                f"Open this URL on your phone or any device on the same network."
            )
        except Exception as e:
            logger.error("Failed to start web UI: %s", e, exc_info=True)
            return f"Failed to start Web UI: {e}"
    async def _handle_background_command(self, event: MessageEvent) -> str:
        """Handle /background <prompt> — run a prompt in a separate background session.
@@ -2607,7 +2530,6 @@ class GatewayRunner:
                Platform.SIGNAL: "hermes-signal",
                Platform.HOMEASSISTANT: "hermes-homeassistant",
                Platform.EMAIL: "hermes-email",
                Platform.WEB: "hermes-web",
            }
            platform_toolsets_config = {}
            try:
@@ -2629,7 +2551,6 @@ class GatewayRunner:
                Platform.SIGNAL: "signal",
                Platform.HOMEASSISTANT: "homeassistant",
                Platform.EMAIL: "email",
                Platform.WEB: "web",
            }.get(source.platform, "telegram")
            config_toolsets = platform_toolsets_config.get(platform_config_key)
@@ -3517,7 +3438,6 @@ class GatewayRunner:
            Platform.SIGNAL: "hermes-signal",
            Platform.HOMEASSISTANT: "hermes-homeassistant",
            Platform.EMAIL: "hermes-email",
            Platform.WEB: "hermes-web",
        }
        # Try to load platform_toolsets from config
@@ -3542,7 +3462,6 @@ class GatewayRunner:
            Platform.SIGNAL: "signal",
            Platform.HOMEASSISTANT: "homeassistant",
            Platform.EMAIL: "email",
            Platform.WEB: "web",
        }.get(source.platform, "telegram")
        # Use config override if present (list of toolsets), otherwise hardcoded default
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -383,7 +383,11 @@ class SessionStore:
                with open(sessions_file, "r", encoding="utf-8") as f:
                    data = json.load(f)
                    for key, entry_data in data.items():
-                        self._entries[key] = SessionEntry.from_dict(entry_data)
+                        try:
                            self._entries[key] = SessionEntry.from_dict(entry_data)
                        except (ValueError, KeyError):
                            # Skip entries with unknown/removed platform values
                            continue
            except Exception as e:
                print(f"[gateway] Warning: Failed to load sessions: {e}")
--- a/tests/gateway/test_voice_command.py
+++ b/tests/gateway/test_voice_command.py
@@ -390,33 +390,6 @@ class TestDiscordPlayTtsSkip:
 # Web play_tts sends play_audio (not voice bubble)
 # =====================================================================
 class TestWebPlayTts:
    """Web adapter play_tts sends invisible play_audio, not a voice bubble."""
    @pytest.mark.asyncio
    async def test_play_tts_sends_play_audio(self, tmp_path):
        from gateway.platforms.web import WebAdapter
        from gateway.config import PlatformConfig
        config = PlatformConfig(enabled=True, extra={
            "port": 0, "host": "127.0.0.1", "token": "tok",
        })
        adapter = WebAdapter(config)
        adapter._broadcast = AsyncMock()
        adapter._media_dir = tmp_path / "media"
        adapter._media_dir.mkdir()
        audio_file = tmp_path / "test.ogg"
        audio_file.write_bytes(b"fake audio")
        result = await adapter.play_tts(chat_id="web", audio_path=str(audio_file))
        assert result.success is True
        payload = adapter._broadcast.call_args[0][0]
        assert payload["type"] == "play_audio"
        assert "/media/" in payload["url"]
 # =====================================================================
 # Help text + known commands
 # =====================================================================
--- a/tests/gateway/test_web.py
+++ b/tests/gateway/test_web.py
@@ -1,926 +0,0 @@
 """Tests for the Web UI gateway platform adapter.
 Covers:
 1. Platform enum exists with correct value
 2. Config loading from env vars via _apply_env_overrides
 3. WebAdapter init and config parsing (port, host, token)
 4. Token auto-generation when not provided
 5. check_web_requirements function
 6. HTTP server start/stop (connect/disconnect)
 7. Auth screen served on GET /
 8. Media directory creation and cleanup
 9. WebSocket auth handshake (auth_ok / auth_fail)
 10. WebSocket message routing (text, voice)
 11. Auto-TTS play_tts sends invisible playback
 12. Authorization bypass (Web platform always authorized)
 13. Toolset registration (hermes-web in toolset maps)
 14. LAN IP detection (_get_local_ip / _get_local_ips)
 15. Security: path traversal sanitization
 16. Security: media endpoint authentication
 17. Security: hmac.compare_digest for token comparison
 18. Security: DOMPurify XSS prevention
 19. Security: default bind to 127.0.0.1
 20. Security: /remote-control token hiding in group chats
 21. Network: VPN/multi-interface IP detection edge cases
 22. Network: startup message token exposure
 """
 import asyncio
 import json
 import os
 import unittest
 from pathlib import Path
 from unittest.mock import patch, MagicMock, AsyncMock
 import pytest
 from gateway.config import GatewayConfig, Platform, PlatformConfig, _apply_env_overrides
 from gateway.platforms.base import SendResult
 # ===========================================================================
 # 1. Platform Enum
 # ===========================================================================
 class TestPlatformEnum(unittest.TestCase):
    """Verify WEB is in the Platform enum."""
    def test_web_in_platform_enum(self):
        self.assertEqual(Platform.WEB.value, "web")
    def test_web_distinct_from_others(self):
        platforms = [p.value for p in Platform]
        self.assertIn("web", platforms)
        self.assertEqual(platforms.count("web"), 1)
 # ===========================================================================
 # 2. Config loading from env vars
 # ===========================================================================
 class TestConfigEnvOverrides(unittest.TestCase):
    """Verify web UI config is loaded from environment variables."""
    @patch.dict(os.environ, {
        "WEB_UI_ENABLED": "true",
        "WEB_UI_PORT": "9000",
        "WEB_UI_HOST": "127.0.0.1",
        "WEB_UI_TOKEN": "mytoken",
    }, clear=False)
    def test_web_config_loaded_from_env(self):
        config = GatewayConfig()
        _apply_env_overrides(config)
        self.assertIn(Platform.WEB, config.platforms)
        self.assertTrue(config.platforms[Platform.WEB].enabled)
        self.assertEqual(config.platforms[Platform.WEB].extra["port"], 9000)
        self.assertEqual(config.platforms[Platform.WEB].extra["host"], "127.0.0.1")
        self.assertEqual(config.platforms[Platform.WEB].extra["token"], "mytoken")
    @patch.dict(os.environ, {
        "WEB_UI_ENABLED": "true",
        "WEB_UI_TOKEN": "",
        "WEB_UI_HOST": "",
    }, clear=False)
    def test_web_defaults(self):
        config = GatewayConfig()
        _apply_env_overrides(config)
        self.assertIn(Platform.WEB, config.platforms)
        self.assertEqual(config.platforms[Platform.WEB].extra["port"], 8765)
        self.assertEqual(config.platforms[Platform.WEB].extra["host"], "127.0.0.1")
        self.assertEqual(config.platforms[Platform.WEB].extra["token"], "")
    @patch.dict(os.environ, {}, clear=True)
    def test_web_not_loaded_without_env(self):
        config = GatewayConfig()
        _apply_env_overrides(config)
        self.assertNotIn(Platform.WEB, config.platforms)
    @patch.dict(os.environ, {"WEB_UI_ENABLED": "false"}, clear=False)
    def test_web_not_loaded_when_disabled(self):
        config = GatewayConfig()
        _apply_env_overrides(config)
        self.assertNotIn(Platform.WEB, config.platforms)
 # ===========================================================================
 # 3. WebAdapter init
 # ===========================================================================
 class TestWebAdapterInit:
    """Test adapter initialization and config parsing."""
    def _make_adapter(self, **extra):
        from gateway.platforms.web import WebAdapter
        defaults = {"port": 8765, "host": "0.0.0.0", "token": ""}
        defaults.update(extra)
        config = PlatformConfig(enabled=True, extra=defaults)
        return WebAdapter(config)
    def test_default_port(self):
        adapter = self._make_adapter()
        assert adapter._port == 8765
    def test_custom_port(self):
        adapter = self._make_adapter(port=9999)
        assert adapter._port == 9999
    def test_custom_host(self):
        adapter = self._make_adapter(host="127.0.0.1")
        assert adapter._host == "127.0.0.1"
    def test_explicit_token(self):
        adapter = self._make_adapter(token="secret123")
        assert adapter._token == "secret123"
    def test_auto_generated_token(self):
        adapter = self._make_adapter(token="")
        assert len(adapter._token) > 0
        assert adapter._token != ""
    def test_name_property(self):
        adapter = self._make_adapter()
        assert adapter.name == "Web"
 # ===========================================================================
 # 4. check_web_requirements
 # ===========================================================================
 class TestCheckRequirements:
    def test_aiohttp_available(self):
        from gateway.platforms.web import check_web_requirements
        # aiohttp is installed in the test env
        assert check_web_requirements() is True
 # ===========================================================================
 # 5. HTTP server connect/disconnect
 # ===========================================================================
 def _get_free_port():
    """Get a free port from the OS."""
    import socket
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind(("127.0.0.1", 0))
        return s.getsockname()[1]
 class TestServerLifecycle:
    """Test that the aiohttp server starts and stops correctly."""
    def _make_adapter(self):
        from gateway.platforms.web import WebAdapter
        port = _get_free_port()
        config = PlatformConfig(enabled=True, extra={
            "port": port, "host": "127.0.0.1", "token": "test",
        })
        return WebAdapter(config)
    @pytest.mark.asyncio
    async def test_connect_starts_server(self):
        adapter = self._make_adapter()
        try:
            result = await adapter.connect()
            assert result is True
            assert adapter._runner is not None
        finally:
            await adapter.disconnect()
    @pytest.mark.asyncio
    async def test_disconnect_stops_server(self):
        adapter = self._make_adapter()
        await adapter.connect()
        await adapter.disconnect()
        assert adapter._runner is None or True  # cleanup done
    @pytest.mark.asyncio
    async def test_serves_html_on_get(self):
        import aiohttp
        adapter = self._make_adapter()
        try:
            await adapter.connect()
            port = adapter._port
            async with aiohttp.ClientSession() as session:
                async with session.get(f"http://127.0.0.1:{port}/") as resp:
                    assert resp.status == 200
                    text = await resp.text()
                    assert "Hermes" in text
                    assert "<html" in text.lower()
        finally:
            await adapter.disconnect()
 # ===========================================================================
 # 6. WebSocket auth handshake
 # ===========================================================================
 class TestWebSocketAuth:
    """Test WebSocket authentication flow."""
    def _make_adapter(self):
        from gateway.platforms.web import WebAdapter
        port = _get_free_port()
        config = PlatformConfig(enabled=True, extra={
            "port": port, "host": "127.0.0.1", "token": "correcttoken",
        })
        return WebAdapter(config)
    @pytest.mark.asyncio
    async def test_auth_success(self):
        import aiohttp
        adapter = self._make_adapter()
        try:
            await adapter.connect()
            port = adapter._port
            async with aiohttp.ClientSession() as session:
                async with session.ws_connect(f"http://127.0.0.1:{port}/ws") as ws:
                    await ws.send_json({"type": "auth", "token": "correcttoken"})
                    msg = await asyncio.wait_for(ws.receive_json(), timeout=3)
                    assert msg["type"] == "auth_ok"
                    assert "session_id" in msg
        finally:
            await adapter.disconnect()
    @pytest.mark.asyncio
    async def test_auth_failure(self):
        import aiohttp
        adapter = self._make_adapter()
        try:
            await adapter.connect()
            port = adapter._port
            async with aiohttp.ClientSession() as session:
                async with session.ws_connect(f"http://127.0.0.1:{port}/ws") as ws:
                    await ws.send_json({"type": "auth", "token": "wrongtoken"})
                    msg = await asyncio.wait_for(ws.receive_json(), timeout=3)
                    assert msg["type"] == "auth_fail"
        finally:
            await adapter.disconnect()
 # ===========================================================================
 # 7. WebSocket messaging
 # ===========================================================================
 class TestWebSocketMessaging:
    """Test text message routing through WebSocket."""
    @pytest.mark.asyncio
    async def test_text_message_dispatched_to_handler(self):
        import aiohttp
        from gateway.platforms.web import WebAdapter
        from gateway.platforms.base import MessageEvent
        handler_called = asyncio.Event()
        received_event = {}
        async def mock_handler(event: MessageEvent):
            received_event["text"] = event.text
            received_event["platform"] = event.source.platform
            handler_called.set()
            return "Hello back!"
        port = _get_free_port()
        config = PlatformConfig(enabled=True, extra={
            "port": port, "host": "127.0.0.1", "token": "tok",
        })
        adapter = WebAdapter(config)
        adapter.set_message_handler(mock_handler)
        try:
            await adapter.connect()
            port = adapter._port
            async with aiohttp.ClientSession() as session:
                async with session.ws_connect(f"http://127.0.0.1:{port}/ws") as ws:
                    # Auth first
                    await ws.send_json({"type": "auth", "token": "tok"})
                    auth_msg = await asyncio.wait_for(ws.receive_json(), timeout=3)
                    assert auth_msg["type"] == "auth_ok"
                    # Send text message
                    await ws.send_json({"type": "message", "text": "Hello Hermes"})
                    # Wait for handler to be called
                    await asyncio.wait_for(handler_called.wait(), timeout=5)
                    assert received_event["text"] == "Hello Hermes"
                    assert received_event["platform"] == Platform.WEB
        finally:
            await adapter.disconnect()
 # ===========================================================================
 # 8. send / send_voice / play_tts
 # ===========================================================================
 class TestSendMethods:
    """Test adapter send methods."""
    def _make_adapter(self):
        from gateway.platforms.web import WebAdapter
        config = PlatformConfig(enabled=True, extra={
            "port": 0, "host": "127.0.0.1", "token": "tok",
        })
        adapter = WebAdapter(config)
        adapter._broadcast = AsyncMock()
        return adapter
    @pytest.mark.asyncio
    async def test_send_broadcasts_message(self):
        adapter = self._make_adapter()
        result = await adapter.send(chat_id="web", content="Hello!")
        assert result.success is True
        adapter._broadcast.assert_called_once()
        payload = adapter._broadcast.call_args[0][0]
        assert payload["type"] == "message"
        assert payload["content"] == "Hello!"
    @pytest.mark.asyncio
    async def test_send_voice_broadcasts_voice(self, tmp_path):
        adapter = self._make_adapter()
        # Create a fake audio file
        audio_file = tmp_path / "test.mp3"
        audio_file.write_bytes(b"fake audio data")
        adapter._media_dir = tmp_path / "media"
        adapter._media_dir.mkdir()
        result = await adapter.send_voice(chat_id="web", audio_path=str(audio_file))
        assert result.success is True
        payload = adapter._broadcast.call_args[0][0]
        assert payload["type"] == "voice"
        assert "/media/" in payload["url"]
    @pytest.mark.asyncio
    async def test_play_tts_broadcasts_play_audio(self, tmp_path):
        adapter = self._make_adapter()
        audio_file = tmp_path / "tts.mp3"
        audio_file.write_bytes(b"fake tts data")
        adapter._media_dir = tmp_path / "media"
        adapter._media_dir.mkdir()
        result = await adapter.play_tts(chat_id="web", audio_path=str(audio_file))
        assert result.success is True
        payload = adapter._broadcast.call_args[0][0]
        assert payload["type"] == "play_audio"
        assert "/media/" in payload["url"]
 # ===========================================================================
 # 9. Authorization bypass for Web platform
 # ===========================================================================
 class TestWebAuthorization:
    """Web platform should always be authorized (token-gated at WebSocket level)."""
    def test_web_platform_always_authorized(self):
        from gateway.platforms.base import SessionSource
        source = SessionSource(
            platform=Platform.WEB,
            user_id="web_session",
            chat_id="web",
            user_name="Web User",
        )
        # Import and check the authorization logic
        # Web platform returns True in _is_user_authorized
        assert source.platform == Platform.WEB
 # ===========================================================================
 # 10. Toolset registration
 # ===========================================================================
 class TestToolsetRegistration:
    """Verify hermes-web toolset is defined."""
    def test_hermes_web_toolset_exists(self):
        from toolsets import get_toolset
        ts = get_toolset("hermes-web")
        assert ts is not None
        assert "tools" in ts
    def test_hermes_web_in_gateway_toolset(self):
        from toolsets import get_toolset
        gateway_ts = get_toolset("hermes-gateway")
        assert gateway_ts is not None
        assert "hermes-web" in gateway_ts.get("includes", [])
    def test_hermes_web_has_tts_tool(self):
        from toolsets import get_toolset
        ts = get_toolset("hermes-web")
        tools = ts.get("tools", [])
        assert "text_to_speech" in tools
 # ===========================================================================
 # 11. Transcription Groq fallback
 # ===========================================================================
 class TestTranscriptionGroqFallback:
    """Test that transcription falls back to Groq when OpenAI key is missing."""
    @patch.dict(os.environ, {"GROQ_API_KEY": "gsk_fake"}, clear=True)
    def test_groq_fallback_resolves(self):
        """When only GROQ_API_KEY is set, transcribe_audio should not fail with 'key not set'."""
        from tools.transcription_tools import transcribe_audio
        # Call with a non-existent file — should fail on file validation, not key check
        result = transcribe_audio("/nonexistent/audio.mp3")
        assert result["success"] is False
        assert "not set" not in result.get("error", "")
        assert "not found" in result.get("error", "").lower()
    @patch.dict(os.environ, {}, clear=True)
    def test_no_key_returns_error(self, tmp_path):
        audio_file = tmp_path / "test.ogg"
        audio_file.write_bytes(b"fake audio data")
        from tools.transcription_tools import transcribe_audio
        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False):
            result = transcribe_audio(str(audio_file))
        assert result["success"] is False
        assert "no stt provider" in result.get("error", "").lower()
 # ===========================================================================
 # 12. LAN IP detection
 # ===========================================================================
 class TestLanIpDetection:
    """Test _get_local_ip returns a valid IP."""
    def test_returns_ip_string(self):
        from gateway.platforms.web import WebAdapter
        config = PlatformConfig(enabled=True, extra={
            "port": 8765, "host": "0.0.0.0", "token": "",
        })
        adapter = WebAdapter(config)
        ip = adapter._get_local_ip()
        assert isinstance(ip, str)
        # Should be a valid IP-like string
        parts = ip.split(".")
        assert len(parts) == 4
    def test_get_local_ips_returns_list(self):
        from gateway.platforms.web import WebAdapter
        config = PlatformConfig(enabled=True, extra={
            "port": 8765, "host": "0.0.0.0", "token": "",
        })
        adapter = WebAdapter(config)
        ips = adapter._get_local_ips()
        assert isinstance(ips, list)
        assert len(ips) >= 1
 # ===========================================================================
 # 13. play_tts base class fallback
 # ===========================================================================
 class TestPlayTtsBaseFallback:
    """Test that base class play_tts falls back to send_voice."""
    @pytest.mark.asyncio
    async def test_base_play_tts_calls_send_voice(self):
        """Web adapter overrides play_tts; verify it sends play_audio not voice."""
        from gateway.platforms.web import WebAdapter
        config = PlatformConfig(enabled=True, extra={
            "port": 8765, "host": "127.0.0.1", "token": "tok",
        })
        adapter = WebAdapter(config)
        adapter._broadcast = AsyncMock()
        adapter._media_dir = Path("/tmp/test_media")
        adapter._media_dir.mkdir(exist_ok=True)
        import tempfile
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
            f.write(b"fake")
            tmp = f.name
        try:
            result = await adapter.play_tts(chat_id="test", audio_path=tmp)
            assert result.success is True
            payload = adapter._broadcast.call_args[0][0]
            assert payload["type"] == "play_audio"
        finally:
            os.unlink(tmp)
 # ===========================================================================
 # 14. Media directory management
 # ===========================================================================
 class TestMediaDirectory:
    """Test media directory is created on adapter init."""
    def test_media_dir_created(self, tmp_path):
        from gateway.platforms.web import WebAdapter
        config = PlatformConfig(enabled=True, extra={
            "port": 8765, "host": "127.0.0.1", "token": "tok",
        })
        adapter = WebAdapter(config)
        assert adapter._media_dir.exists() or True  # may use default path
 # ===========================================================================
 # 15. Security: Path traversal sanitization
 # ===========================================================================
 class TestPathTraversalSanitization:
    """Upload filenames with traversal sequences are sanitized."""
    def test_path_name_strips_traversal(self):
        """Path.name strips directory traversal from filenames."""
        assert Path("../../../etc/passwd").name == "passwd"
        assert Path("normal_file.txt").name == "normal_file.txt"
        assert Path("/absolute/path/file.txt").name == "file.txt"
    @pytest.mark.asyncio
    async def test_upload_produces_safe_filename(self):
        import aiohttp
        from gateway.platforms.web import WebAdapter
        port = _get_free_port()
        config = PlatformConfig(enabled=True, extra={
            "port": port, "host": "127.0.0.1", "token": "tok",
        })
        adapter = WebAdapter(config)
        try:
            await adapter.connect()
            async with aiohttp.ClientSession() as session:
                data = aiohttp.FormData()
                data.add_field("file", b"test content",
                               filename="safe_file.txt",
                               content_type="application/octet-stream")
                async with session.post(
                    f"http://127.0.0.1:{port}/upload",
                    data=data,
                    headers={"Authorization": "Bearer tok"},
                ) as resp:
                    assert resp.status == 200
                    result = await resp.json()
                    assert result["filename"].startswith("upload_")
                    assert "safe_file.txt" in result["filename"]
                    # File must be inside media dir, not escaped
                    assert result["url"].startswith("/media/")
        finally:
            await adapter.disconnect()
    def test_sanitize_in_source_code(self):
        """Verify source code uses Path().name for filename sanitization."""
        import inspect
        from gateway.platforms.web import WebAdapter
        source = inspect.getsource(WebAdapter._handle_upload)
        assert "Path(" in source and ".name" in source
 # ===========================================================================
 # 16. Security: Media endpoint authentication
 # ===========================================================================
 class TestMediaEndpointAuth:
    """Media files require a valid token query parameter."""
    @pytest.mark.asyncio
    async def test_media_without_token_returns_401(self):
        import aiohttp
        from gateway.platforms.web import WebAdapter
        port = _get_free_port()
        config = PlatformConfig(enabled=True, extra={
            "port": port, "host": "127.0.0.1", "token": "secret",
        })
        adapter = WebAdapter(config)
        try:
            await adapter.connect()
            async with aiohttp.ClientSession() as session:
                async with session.get(
                    f"http://127.0.0.1:{port}/media/test.txt"
                ) as resp:
                    assert resp.status == 401
        finally:
            await adapter.disconnect()
    @pytest.mark.asyncio
    async def test_media_with_wrong_token_returns_401(self):
        import aiohttp
        from gateway.platforms.web import WebAdapter
        port = _get_free_port()
        config = PlatformConfig(enabled=True, extra={
            "port": port, "host": "127.0.0.1", "token": "secret",
        })
        adapter = WebAdapter(config)
        try:
            await adapter.connect()
            async with aiohttp.ClientSession() as session:
                async with session.get(
                    f"http://127.0.0.1:{port}/media/test.txt?token=wrong"
                ) as resp:
                    assert resp.status == 401
        finally:
            await adapter.disconnect()
    @pytest.mark.asyncio
    async def test_media_with_valid_token_serves_file(self):
        import aiohttp
        from gateway.platforms.web import WebAdapter
        port = _get_free_port()
        config = PlatformConfig(enabled=True, extra={
            "port": port, "host": "127.0.0.1", "token": "secret",
        })
        adapter = WebAdapter(config)
        try:
            await adapter.connect()
            # Create a test file in the media directory
            test_file = adapter._media_dir / "testfile.txt"
            test_file.write_text("hello")
            async with aiohttp.ClientSession() as session:
                async with session.get(
                    f"http://127.0.0.1:{port}/media/testfile.txt?token=secret"
                ) as resp:
                    assert resp.status == 200
                    body = await resp.text()
                    assert body == "hello"
        finally:
            await adapter.disconnect()
    @pytest.mark.asyncio
    async def test_media_path_traversal_in_url_blocked(self):
        import aiohttp
        from gateway.platforms.web import WebAdapter
        port = _get_free_port()
        config = PlatformConfig(enabled=True, extra={
            "port": port, "host": "127.0.0.1", "token": "secret",
        })
        adapter = WebAdapter(config)
        try:
            await adapter.connect()
            async with aiohttp.ClientSession() as session:
                async with session.get(
                    f"http://127.0.0.1:{port}/media/..%2F..%2Fetc%2Fpasswd?token=secret"
                ) as resp:
                    assert resp.status == 404
        finally:
            await adapter.disconnect()
 # ===========================================================================
 # 17. Security: hmac.compare_digest for token comparison
 # ===========================================================================
 class TestHmacTokenComparison:
    """Verify source code uses hmac.compare_digest, not == / !=."""
    def test_no_equality_operator_for_token(self):
        import inspect
        from gateway.platforms.web import WebAdapter
        source = inspect.getsource(WebAdapter)
        # There should be no `== self._token` or `!= self._token` in the source
        assert "== self._token" not in source, \
            "Token comparison must use hmac.compare_digest, not =="
        assert "!= self._token" not in source, \
            "Token comparison must use hmac.compare_digest, not !="
    def test_hmac_compare_digest_used(self):
        import inspect
        from gateway.platforms.web import WebAdapter
        source = inspect.getsource(WebAdapter)
        assert "hmac.compare_digest" in source
 # ===========================================================================
 # 18. Security: DOMPurify XSS prevention
 # ===========================================================================
 class TestDomPurifyPresent:
    """HTML template includes DOMPurify for XSS prevention."""
    def test_dompurify_script_included(self):
        from gateway.platforms.web import _build_chat_html
        html = _build_chat_html()
        assert "dompurify" in html.lower()
        assert "DOMPurify.sanitize" in html
    def test_marked_output_sanitized(self):
        from gateway.platforms.web import _build_chat_html
        html = _build_chat_html()
        assert "DOMPurify.sanitize(marked.parse(" in html
 # ===========================================================================
 # 19. Security: default bind to localhost
 # ===========================================================================
 class TestDefaultBindLocalhost:
    """Default host should be 127.0.0.1, not 0.0.0.0."""
    def test_adapter_default_host(self):
        from gateway.platforms.web import WebAdapter
        config = PlatformConfig(enabled=True, extra={})
        adapter = WebAdapter(config)
        assert adapter._host == "127.0.0.1"
    @patch.dict(os.environ, {"WEB_UI_ENABLED": "true"}, clear=True)
    def test_config_default_host(self):
        config = GatewayConfig()
        _apply_env_overrides(config)
        assert config.platforms[Platform.WEB].extra["host"] == "127.0.0.1"
 # ===========================================================================
 # 20. Security: /remote-control token hiding in group chats
 # ===========================================================================
 class TestRemoteControlTokenHiding:
    """Token should be hidden when /remote-control is used in group chats."""
    def _make_runner(self, tmp_path):
        from gateway.run import GatewayRunner
        runner = object.__new__(GatewayRunner)
        runner.adapters = {}
        runner._voice_mode = {}
        runner._VOICE_MODE_PATH = tmp_path / "voice.json"
        runner._session_db = None
        runner.session_store = MagicMock()
        return runner
    def _make_event(self, chat_type="dm"):
        from gateway.platforms.base import MessageEvent, SessionSource
        source = SessionSource(
            chat_id="test",
            user_id="user1",
            platform=Platform.WEB,
            chat_type=chat_type,
        )
        event = MessageEvent(text="/remote-control", source=source)
        event.message_id = "msg1"
        return event
    @pytest.mark.asyncio
    async def test_token_visible_in_dm(self, tmp_path):
        from gateway.platforms.web import WebAdapter
        runner = self._make_runner(tmp_path)
        # Simulate a running web adapter
        config = PlatformConfig(enabled=True, extra={
            "port": 8765, "host": "127.0.0.1", "token": "mysecret",
        })
        adapter = WebAdapter(config)
        runner.adapters[Platform.WEB] = adapter
        event = self._make_event(chat_type="dm")
        result = await runner._handle_remote_control_command(event)
        assert "mysecret" in result
    @pytest.mark.asyncio
    async def test_token_hidden_in_group(self, tmp_path):
        from gateway.platforms.web import WebAdapter
        runner = self._make_runner(tmp_path)
        config = PlatformConfig(enabled=True, extra={
            "port": 8765, "host": "127.0.0.1", "token": "mysecret",
        })
        adapter = WebAdapter(config)
        runner.adapters[Platform.WEB] = adapter
        event = self._make_event(chat_type="group")
        result = await runner._handle_remote_control_command(event)
        assert "mysecret" not in result
        assert "hidden" in result.lower()
 # ===========================================================================
 # 21. VPN / multi-interface IP detection edge cases
 # ===========================================================================
 class TestVpnAndMultiInterfaceIp:
    """IP detection must prefer LAN IPs over VPN and handle edge cases."""
    def test_lan_preferred_over_vpn(self):
        """192.168.x.x or 10.x.x.x should be chosen over 172.16.x.x VPN."""
        from gateway.platforms.web import WebAdapter
        with unittest.mock.patch.object(
            WebAdapter, "_get_local_ips",
            return_value=["172.16.0.2", "192.168.1.106"],
        ):
            ip = WebAdapter._get_local_ip()
            assert ip == "192.168.1.106"
    def test_ten_network_preferred_over_vpn(self):
        """10.x.x.x corporate LAN should be preferred over 172.16.x.x VPN."""
        from gateway.platforms.web import WebAdapter
        with unittest.mock.patch.object(
            WebAdapter, "_get_local_ips",
            return_value=["172.16.5.1", "10.0.0.50"],
        ):
            ip = WebAdapter._get_local_ip()
            assert ip == "10.0.0.50"
    def test_only_vpn_ip_still_returned(self):
        """If only VPN IP exists, return it rather than nothing."""
        from gateway.platforms.web import WebAdapter
        with unittest.mock.patch.object(
            WebAdapter, "_get_local_ips",
            return_value=["172.16.0.2"],
        ):
            ip = WebAdapter._get_local_ip()
            assert ip == "172.16.0.2"
    def test_no_interfaces_returns_localhost(self):
        """If no IPs found at all, fall back to 127.0.0.1."""
        from gateway.platforms.web import WebAdapter
        with unittest.mock.patch.object(
            WebAdapter, "_get_local_ips",
            return_value=[],
        ):
            ip = WebAdapter._get_local_ip()
            assert ip == "127.0.0.1"
    def test_multiple_lan_ips_returns_first_match(self):
        """Multiple LAN IPs: first 192.168/10.x match wins."""
        from gateway.platforms.web import WebAdapter
        with unittest.mock.patch.object(
            WebAdapter, "_get_local_ips",
            return_value=["172.16.0.2", "192.168.1.50", "10.0.0.1"],
        ):
            ip = WebAdapter._get_local_ip()
            assert ip == "192.168.1.50"
    def test_get_local_ips_excludes_loopback(self):
        """_get_local_ips must not return 127.x.x.x addresses."""
        from gateway.platforms.web import WebAdapter
        import inspect
        source = inspect.getsource(WebAdapter._get_local_ips)
        # Must filter out 127.x addresses
        assert "127." in source, \
            "_get_local_ips must filter loopback addresses"
    def test_get_local_ips_netifaces_fallback(self):
        """When netifaces is unavailable, ifconfig fallback must work."""
        from gateway.platforms.web import WebAdapter
        import inspect
        source = inspect.getsource(WebAdapter._get_local_ips)
        assert "ifconfig" in source, \
            "_get_local_ips must have ifconfig fallback"
        assert "ImportError" in source, \
            "_get_local_ips must catch netifaces ImportError"
 # ===========================================================================
 # 22. Startup message token exposure
 # ===========================================================================
 class TestStartupTokenExposure:
    """Configured tokens must not be printed in startup output."""
    def test_auto_generated_flag_when_no_token(self):
        """Token auto-generation flag must be set when no token provided."""
        from gateway.platforms.web import WebAdapter
        config = PlatformConfig(enabled=True, extra={
            "port": 8765, "host": "127.0.0.1", "token": "",
        })
        adapter = WebAdapter(config)
        assert adapter._token_auto_generated is True
        assert len(adapter._token) == 32  # secrets.token_hex(16) = 32 chars
    def test_configured_flag_when_token_set(self):
        """Token auto-generation flag must be False when token is provided."""
        from gateway.platforms.web import WebAdapter
        config = PlatformConfig(enabled=True, extra={
            "port": 8765, "host": "127.0.0.1", "token": "mytoken123",
        })
        adapter = WebAdapter(config)
        assert adapter._token_auto_generated is False
        assert adapter._token == "mytoken123"
    def test_startup_log_hides_configured_token(self):
        """connect() must not print the token value when set via env."""
        from gateway.platforms.web import WebAdapter
        import inspect
        source = inspect.getsource(WebAdapter.connect)
        # Must check _token_auto_generated before printing
        assert "_token_auto_generated" in source, \
            "connect() must check _token_auto_generated before printing token"
    def test_startup_log_shows_auto_token(self):
        """connect() must print the token when auto-generated."""
        from gateway.platforms.web import WebAdapter
        import inspect
        source = inspect.getsource(WebAdapter.connect)
        # Must have a branch that prints the actual token
        assert "auto-generated" in source, \
            "connect() must indicate when token is auto-generated"
--- a/tests/tools/test_transcription.py
+++ b/tests/tools/test_transcription.py
@@ -125,7 +125,7 @@ class TestTranscribeLocal:
        mock_model.transcribe.return_value = ([mock_segment], mock_info)
        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
-             patch("tools.transcription_tools.WhisperModel", return_value=mock_model), \
+             patch("faster_whisper.WhisperModel", return_value=mock_model), \
             patch("tools.transcription_tools._local_model", None):
            from tools.transcription_tools import _transcribe_local
            result = _transcribe_local(str(audio_file), "base")
@@ -164,7 +164,7 @@ class TestTranscribeOpenAI:
        mock_client.audio.transcriptions.create.return_value = "Hello from OpenAI"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_openai
            result = _transcribe_openai(str(audio_file), "whisper-1")
--- a/tests/tools/test_transcription_tools.py
+++ b/tests/tools/test_transcription_tools.py
@@ -160,7 +160,7 @@ class TestTranscribeGroq:
        mock_client.audio.transcriptions.create.return_value = "hello world"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_groq
            result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo")
@@ -175,7 +175,7 @@ class TestTranscribeGroq:
        mock_client.audio.transcriptions.create.return_value = "  hello world  \n"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_groq
            result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo")
@@ -188,7 +188,7 @@ class TestTranscribeGroq:
        mock_client.audio.transcriptions.create.return_value = "test"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client) as mock_openai_cls:
+             patch("openai.OpenAI", return_value=mock_client) as mock_openai_cls:
            from tools.transcription_tools import _transcribe_groq, GROQ_BASE_URL
            _transcribe_groq(sample_wav, "whisper-large-v3-turbo")
@@ -202,7 +202,7 @@ class TestTranscribeGroq:
        mock_client.audio.transcriptions.create.side_effect = Exception("API error")
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_groq
            result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo")
@@ -216,7 +216,7 @@ class TestTranscribeGroq:
        mock_client.audio.transcriptions.create.side_effect = PermissionError("denied")
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_groq
            result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo")
@@ -244,7 +244,7 @@ class TestTranscribeOpenAIExtended:
        mock_client.audio.transcriptions.create.return_value = "test"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client) as mock_openai_cls:
+             patch("openai.OpenAI", return_value=mock_client) as mock_openai_cls:
            from tools.transcription_tools import _transcribe_openai, OPENAI_BASE_URL
            _transcribe_openai(sample_wav, "whisper-1")
@@ -258,7 +258,7 @@ class TestTranscribeOpenAIExtended:
        mock_client.audio.transcriptions.create.return_value = "  hello  \n"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_openai
            result = _transcribe_openai(sample_wav, "whisper-1")
@@ -271,7 +271,7 @@ class TestTranscribeOpenAIExtended:
        mock_client.audio.transcriptions.create.side_effect = PermissionError("denied")
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_openai
            result = _transcribe_openai(sample_wav, "whisper-1")
@@ -300,7 +300,7 @@ class TestTranscribeLocalExtended:
        mock_whisper_cls = MagicMock(return_value=mock_model)
        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
-             patch("tools.transcription_tools.WhisperModel", mock_whisper_cls), \
+             patch("faster_whisper.WhisperModel", mock_whisper_cls), \
             patch("tools.transcription_tools._local_model", None), \
             patch("tools.transcription_tools._local_model_name", None):
            from tools.transcription_tools import _transcribe_local
@@ -326,7 +326,7 @@ class TestTranscribeLocalExtended:
        mock_whisper_cls = MagicMock(return_value=mock_model)
        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
-             patch("tools.transcription_tools.WhisperModel", mock_whisper_cls), \
+             patch("faster_whisper.WhisperModel", mock_whisper_cls), \
             patch("tools.transcription_tools._local_model", None), \
             patch("tools.transcription_tools._local_model_name", None):
            from tools.transcription_tools import _transcribe_local
@@ -342,7 +342,7 @@ class TestTranscribeLocalExtended:
        mock_whisper_cls = MagicMock(side_effect=RuntimeError("CUDA out of memory"))
        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
-             patch("tools.transcription_tools.WhisperModel", mock_whisper_cls), \
+             patch("faster_whisper.WhisperModel", mock_whisper_cls), \
             patch("tools.transcription_tools._local_model", None):
            from tools.transcription_tools import _transcribe_local
            result = _transcribe_local(str(audio), "large-v3")
@@ -366,7 +366,7 @@ class TestTranscribeLocalExtended:
        mock_model.transcribe.return_value = ([seg1, seg2], mock_info)
        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
-             patch("tools.transcription_tools.WhisperModel", return_value=mock_model), \
+             patch("faster_whisper.WhisperModel", return_value=mock_model), \
             patch("tools.transcription_tools._local_model", None):
            from tools.transcription_tools import _transcribe_local
            result = _transcribe_local(str(audio), "base")
@@ -387,7 +387,7 @@ class TestModelAutoCorrection:
        mock_client.audio.transcriptions.create.return_value = "hello world"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_groq, DEFAULT_GROQ_STT_MODEL
            _transcribe_groq(sample_wav, "whisper-1")
@@ -401,7 +401,7 @@ class TestModelAutoCorrection:
        mock_client.audio.transcriptions.create.return_value = "test"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_groq, DEFAULT_GROQ_STT_MODEL
            _transcribe_groq(sample_wav, "gpt-4o-transcribe")
@@ -415,7 +415,7 @@ class TestModelAutoCorrection:
        mock_client.audio.transcriptions.create.return_value = "hello world"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_openai, DEFAULT_STT_MODEL
            _transcribe_openai(sample_wav, "whisper-large-v3-turbo")
@@ -429,7 +429,7 @@ class TestModelAutoCorrection:
        mock_client.audio.transcriptions.create.return_value = "test"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_openai, DEFAULT_STT_MODEL
            _transcribe_openai(sample_wav, "distil-whisper-large-v3-en")
@@ -443,7 +443,7 @@ class TestModelAutoCorrection:
        mock_client.audio.transcriptions.create.return_value = "test"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_groq
            _transcribe_groq(sample_wav, "whisper-large-v3")
@@ -457,7 +457,7 @@ class TestModelAutoCorrection:
        mock_client.audio.transcriptions.create.return_value = "test"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_openai
            _transcribe_openai(sample_wav, "gpt-4o-mini-transcribe")
@@ -472,7 +472,7 @@ class TestModelAutoCorrection:
        mock_client.audio.transcriptions.create.return_value = "test"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_groq
            _transcribe_groq(sample_wav, "my-custom-model")
@@ -486,7 +486,7 @@ class TestModelAutoCorrection:
        mock_client.audio.transcriptions.create.return_value = "test"
        with patch("tools.transcription_tools._HAS_OPENAI", True), \
-             patch("tools.transcription_tools.OpenAI", return_value=mock_client):
+             patch("openai.OpenAI", return_value=mock_client):
            from tools.transcription_tools import _transcribe_openai
            _transcribe_openai(sample_wav, "my-custom-model")
--- a/tests/tools/test_voice_mode.py
+++ b/tests/tools/test_voice_mode.py
@@ -345,6 +345,10 @@ class TestPlayAudioFile:
        np = pytest.importorskip("numpy")
        mock_sd_obj = MagicMock()
        # Simulate stream completing immediately (get_stream().active = False)
        mock_stream = MagicMock()
        mock_stream.active = False
        mock_sd_obj.get_stream.return_value = mock_stream
        def _fake_import():
            return mock_sd_obj, np
@@ -357,7 +361,7 @@ class TestPlayAudioFile:
        assert result is True
        mock_sd_obj.play.assert_called_once()
-        mock_sd_obj.wait.assert_called_once()
+        mock_sd_obj.stop.assert_called_once()
    def test_returns_false_when_no_player(self, monkeypatch, sample_wav):
        def _fail_import():
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -34,18 +34,9 @@ logger = logging.getLogger(__name__)
 # Optional imports — graceful degradation
 # ---------------------------------------------------------------------------
-try:
+import importlib.util as _ilu
-    from faster_whisper import WhisperModel
+_HAS_FASTER_WHISPER = _ilu.find_spec("faster_whisper") is not None
-    _HAS_FASTER_WHISPER = True
+_HAS_OPENAI = _ilu.find_spec("openai") is not None
 except ImportError:
    _HAS_FASTER_WHISPER = False
    WhisperModel = None  # type: ignore[assignment,misc]
 try:
    from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
    _HAS_OPENAI = True
 except ImportError:
    _HAS_OPENAI = False
 # ---------------------------------------------------------------------------
 # Constants
@@ -67,7 +58,7 @@ OPENAI_MODELS = {"whisper-1", "gpt-4o-mini-transcribe", "gpt-4o-transcribe"}
 GROQ_MODELS = {"whisper-large-v3", "whisper-large-v3-turbo", "distil-whisper-large-v3-en"}
 # Singleton for the local model — loaded once, reused across calls
-_local_model: Optional["WhisperModel"] = None
+_local_model: Optional[object] = None
 _local_model_name: Optional[str] = None
 # ---------------------------------------------------------------------------
@@ -195,6 +186,7 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]:
        return {"success": False, "transcript": "", "error": "faster-whisper not installed"}
    try:
        from faster_whisper import WhisperModel
        # Lazy-load the model (downloads on first use, ~150 MB for 'base')
        if _local_model is None or _local_model_name != model_name:
            logger.info("Loading faster-whisper model '%s' (first load downloads the model)...", model_name)
@@ -235,6 +227,7 @@ def _transcribe_groq(file_path: str, model_name: str) -> Dict[str, Any]:
        model_name = DEFAULT_GROQ_STT_MODEL
    try:
        from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
        client = OpenAI(api_key=api_key, base_url=GROQ_BASE_URL, timeout=30, max_retries=0)
        with open(file_path, "rb") as audio_file:
@@ -282,6 +275,7 @@ def _transcribe_openai(file_path: str, model_name: str) -> Dict[str, Any]:
        model_name = DEFAULT_STT_MODEL
    try:
        from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
        client = OpenAI(api_key=api_key, base_url=OPENAI_BASE_URL, timeout=30, max_retries=0)
        with open(file_path, "rb") as audio_file:
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -636,7 +636,13 @@ def play_audio_file(file_path: str) -> bool:
                sample_rate = wf.getframerate()
            sd.play(audio_data, samplerate=sample_rate)
-            sd.wait()
+            # sd.wait() calls Event.wait() without timeout — hangs forever if
            # the audio device stalls.  Poll with a ceiling and force-stop.
            duration_secs = len(audio_data) / sample_rate
            deadline = time.monotonic() + duration_secs + 2.0
            while sd.get_stream() and sd.get_stream().active and time.monotonic() < deadline:
                time.sleep(0.01)
            sd.stop()
            return True
        except (ImportError, OSError):
            pass  # audio libs not available, fall through to system players
--- a/toolsets.py
+++ b/toolsets.py
@@ -292,16 +292,10 @@ TOOLSETS = {
        "includes": []
    },
    "hermes-web": {
        "description": "Web UI bot toolset - browser-based chat interface (full access)",
        "tools": _HERMES_CORE_TOOLS,
        "includes": []
    },
    "hermes-gateway": {
        "description": "Gateway toolset - union of all messaging platform tools",
        "tools": [],
-        "includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant", "hermes-email", "hermes-web"]
+        "includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant", "hermes-email"]
    }
 }
--- a/website/docs/user-guide/features/voice-mode.md
+++ b/website/docs/user-guide/features/voice-mode.md
@@ -478,10 +478,6 @@ The bot requires an @mention by default in server channels. Make sure you:
 - Edge TTS (free, no key) is the default fallback
 - Check logs for TTS errors
 ### Web UI issues (firewall, mobile mic)
 See the [Web UI Troubleshooting](../messaging/web.md#troubleshooting) guide for firewall, HTTPS, and mobile microphone issues.
 ### Whisper returns garbage text
 The hallucination filter catches most cases automatically. If you're still getting phantom transcripts:
--- a/website/docs/user-guide/messaging/index.md
+++ b/website/docs/user-guide/messaging/index.md
@@ -15,12 +15,12 @@ Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, Email, Home As
 │                              Hermes Gateway                                   │
 ├───────────────────────────────────────────────────────────────────────────────┤
 │                                                                               │
-│  ┌──────────┐ ┌─────────┐ ┌──────────┐ ┌───────┐ ┌───────┐ ┌───────┐ ┌────┐ ┌─────┐│
+│  ┌──────────┐ ┌─────────┐ ┌──────────┐ ┌───────┐ ┌───────┐ ┌───────┐ ┌────┐       │
-│  │ Telegram │ │ Discord │ │ WhatsApp │ │ Slack │ │Signal │ │ Email │ │ HA │ │ Web ││
+│  │ Telegram │ │ Discord │ │ WhatsApp │ │ Slack │ │Signal │ │ Email │ │ HA │       │
-│  │ Adapter  │ │ Adapter │ │ Adapter  │ │Adapter│ │Adapter│ │Adapter│ │Adpt│ │Adpt ││
+│  │ Adapter  │ │ Adapter │ │ Adapter  │ │Adapter│ │Adapter│ │Adapter│ │Adpt│       │
-│  └────┬─────┘ └────┬────┘ └────┬─────┘ └──┬────┘ └──┬────┘ └──┬────┘ └─┬──┘ └──┬──┘│
+│  └────┬─────┘ └────┬────┘ └────┬─────┘ └──┬────┘ └──┬────┘ └──┬────┘ └─┬──┘       │
-│       │             │           │           │         │         │        │       │   │
+│       │             │           │           │         │         │        │           │
-│       └─────────────┴───────────┴───────────┴─────────┴─────────┴────────┴───────┘   │
+│       └─────────────┴───────────┴───────────┴─────────┴─────────┴────────┘           │
 │                                     │                                                │
 │                            ┌────────▼────────┐                                       │
 │                            │  Session Store  │                                       │
@@ -81,7 +81,6 @@ hermes gateway status       # Check service status
 | `/background <prompt>` | Run a prompt in a separate background session |
 | `/reload-mcp` | Reload MCP servers from config |
 | `/update` | Update Hermes Agent to the latest version |
 | `/remote-control [port] [token]` | Start web UI for remote access |
 | `/help` | Show available commands |
 | `/<skill-name>` | Invoke any installed skill |
@@ -221,4 +220,3 @@ Each platform has its own toolset:
 - [Signal Setup](signal.md)
 - [Email Setup](email.md)
 - [Home Assistant Integration](homeassistant.md)
 - [Web UI Setup](web.md)
--- a/website/docs/user-guide/messaging/web.md
+++ b/website/docs/user-guide/messaging/web.md
@@ -1,206 +0,0 @@
 ---
 sidebar_position: 8
 title: "Web UI"
 description: "Access Hermes from any browser on your network — phone, tablet, or desktop"
 ---
 # Web UI Setup
 Access Hermes from any browser on your local network. Open the URL on your phone, tablet, or another computer — no app install, no third-party account needed.
 :::info No External Dependencies
 The Web adapter uses `aiohttp`, which is already included in the `[messaging]` extra. No additional packages or external services are required.
 :::
 ## Overview
 | Component | Value |
 |-----------|-------|
 | **Library** | `aiohttp` (HTTP + WebSocket) |
 | **Connection** | Local network (LAN) |
 | **Auth** | Token-based (auto-generated or custom) |
 | **Features** | Markdown, code highlighting, voice messages, images, mobile responsive |
 ---
 ## Quick Start
 ### Option 1: On-Demand via Command
 Start the gateway normally, then type from any connected platform (Telegram, Discord, etc.):
 ```
 /remote-control
 ```
 The bot replies with the URL and access token. Open the URL on your phone.
 You can also specify a custom port and token:
 ```
 /remote-control 9000 mysecrettoken
 ```
 ### Option 2: Auto-Start with Gateway
 Add to `~/.hermes/.env`:
 ```bash
 WEB_UI_ENABLED=true
 WEB_UI_PORT=8765          # default: 8765
 WEB_UI_TOKEN=mytoken      # auto-generated if empty
 ```
 Start the gateway:
 ```bash
 hermes gateway
 ```
 The web UI starts automatically alongside your other platforms.
 ---
 ## Features
 ### Markdown & Code Highlighting
 Bot responses render full GitHub-flavored Markdown with syntax-highlighted code blocks powered by highlight.js.
 ### Voice Conversation
 Click the microphone button to record a voice message. The audio is transcribed via Whisper STT (using OpenAI or Groq as fallback) and sent to the agent. The bot automatically replies with audio playback — voice first, then the text response appears. No extra configuration needed.
 STT uses `VOICE_TOOLS_OPENAI_KEY` (OpenAI Whisper) if set, otherwise falls back to `GROQ_API_KEY` (Groq Whisper, free tier). If you only need STT, setting `GROQ_API_KEY` is the simplest option. TTS uses Edge TTS (free, no key) by default, or ElevenLabs/OpenAI if configured in `~/.hermes/config.yaml`.
 ### Images & Files
 - Images display inline in the chat
 - Documents show as download links
 - Generated images from the agent appear automatically
 ### Mobile Responsive
 The UI adapts to phone screens — full chat experience with touch-friendly input and buttons.
 ### Typing Indicator
 Shows an animated indicator while the agent is processing your message.
 ### Auto-Reconnect
 If the connection drops (server restart, network change), the client automatically reconnects with exponential backoff.
 ---
 ## Firewall & Network
 ### macOS Firewall
 macOS may block incoming connections by default. If devices on your network can't connect:
 1. **System Settings** > **Network** > **Firewall**
 2. Either disable the firewall temporarily, or add Python to the allowed apps
 ### Localhost Only
 To restrict access to the local machine only:
 ```bash
 WEB_UI_HOST=127.0.0.1
 ```
 ### Remote Access (Outside LAN)
 The Web UI is designed for local network access. For access from outside your network, use a tunnel:
 ```bash
 # Using ngrok
 ngrok http 8765
 # Using Cloudflare Tunnel
 cloudflared tunnel --url http://localhost:8765
 # Using Tailscale (recommended — encrypted, no port forwarding)
 # Install Tailscale on both devices, then access via Tailscale IP
 ```
 ---
 ## Security
 - **Token authentication** — every WebSocket connection must authenticate with the correct token before sending messages
 - **No data leaves your network** — the server runs locally, chat data stays on your machine
 - **No HTTPS by default** — traffic is unencrypted on the LAN. Use a reverse proxy or tunnel for encryption
 - **File uploads** require the auth token in the `Authorization` header
 - **Media cleanup** — uploaded and generated files are automatically deleted after 24 hours
 ---
 ## Environment Variables
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `WEB_UI_ENABLED` | `false` | Enable the web gateway |
 | `WEB_UI_PORT` | `8765` | HTTP server port |
 | `WEB_UI_HOST` | `127.0.0.1` | Bind address (`0.0.0.0` = LAN, `127.0.0.1` = localhost) |
 | `WEB_UI_TOKEN` | (auto) | Access token. Auto-generated if empty. |
 ---
 ## Troubleshooting
 ### "Server not found" on phone
 - Verify both devices are on the **same WiFi network**
 - Check if macOS firewall is blocking (see Firewall section above)
 - Try the IP address shown in console output, not `localhost`
 - If using VPN, the console shows all available IPs — try each one
 ### Port already in use
 Change the port in `.env`:
 ```bash
 WEB_UI_PORT=9000
 ```
 ### Voice recording not working
 - Browser must support `MediaRecorder` API (Chrome, Firefox, Safari 14.5+)
 - HTTPS is required for microphone access on non-localhost origins
 - On localhost (`127.0.0.1`), HTTP works fine for microphone
 ### Microphone not working on mobile
 Mobile browsers require **HTTPS** for microphone access (`navigator.mediaDevices` API). When accessing the Web UI over HTTP on a LAN IP (e.g. `http://192.168.1.x:8765`), the mic button will appear dimmed.
 **Android Chrome** — flag the LAN IP as secure:
 1. Open `chrome://flags/#unsafely-treat-insecure-origin-as-secure`
 2. Add your Web UI URL (e.g. `http://192.168.1.106:8765`)
 3. Set to **Enabled** and relaunch Chrome
 **iOS Safari / Chrome** — no flag bypass available. Use one of these instead:
 1. **Self-signed HTTPS** with mkcert (recommended):
   ```bash
   brew install mkcert && mkcert -install
   mkcert 192.168.1.106
   npx local-ssl-proxy --source 8443 --target 8765 \
     --cert 192.168.1.106.pem --key 192.168.1.106-key.pem
   ```
   Then access `https://192.168.1.106:8443`. Trust the mkcert root CA on iOS: **Settings > General > About > Certificate Trust Settings**.
 2. **SSH tunnel from mobile** (if you have Termius or similar):
   ```bash
   ssh -L 8765:127.0.0.1:8765 user@your-mac-ip
   ```
   Then access `http://localhost:8765` — localhost is exempt from the HTTPS requirement.
 :::tip
 Text chat works on mobile over HTTP without any workaround — only the microphone feature requires HTTPS.
 :::
 ### CDN resources not loading
 The UI loads `marked.js` and `highlight.js` from CDN. If you're offline or behind a restrictive proxy, markdown rendering and code highlighting won't work but basic chat still functions.