mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 15:01:34 +08:00
294 lines
9.5 KiB
Python
294 lines
9.5 KiB
Python
|
|
"""Tests for plugins.google_meet.realtime.openai_client (v2).
|
||
|
|
|
||
|
|
Uses a scripted fake WebSocket — no network, no API key required.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import base64
|
||
|
|
import json
|
||
|
|
import sys
|
||
|
|
import threading
|
||
|
|
import types
|
||
|
|
from pathlib import Path
|
||
|
|
from unittest.mock import patch
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture(autouse=True)
|
||
|
|
def _isolate_home(tmp_path, monkeypatch):
|
||
|
|
hermes_home = tmp_path / ".hermes"
|
||
|
|
hermes_home.mkdir()
|
||
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||
|
|
yield hermes_home
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# Fake WebSocket
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
|
||
|
|
class _FakeWS:
|
||
|
|
"""Scripted WS: send() records frames, recv() pops a queue."""
|
||
|
|
|
||
|
|
def __init__(self, recv_frames: list):
|
||
|
|
self.sent: list[dict] = []
|
||
|
|
self._recv_q: list = list(recv_frames)
|
||
|
|
self.closed = False
|
||
|
|
|
||
|
|
def send(self, payload):
|
||
|
|
# Always accept str payloads — client encodes JSON with json.dumps.
|
||
|
|
if isinstance(payload, (bytes, bytearray)):
|
||
|
|
payload = payload.decode()
|
||
|
|
self.sent.append(json.loads(payload))
|
||
|
|
|
||
|
|
def recv(self, timeout=None): # noqa: ARG002
|
||
|
|
if not self._recv_q:
|
||
|
|
raise RuntimeError("fake ws: no more frames")
|
||
|
|
frame = self._recv_q.pop(0)
|
||
|
|
if isinstance(frame, dict):
|
||
|
|
return json.dumps(frame)
|
||
|
|
return frame
|
||
|
|
|
||
|
|
def close(self):
|
||
|
|
self.closed = True
|
||
|
|
|
||
|
|
|
||
|
|
def _install_fake_websockets(monkeypatch, fake_ws):
|
||
|
|
"""Install a fake ``websockets.sync.client`` module in sys.modules."""
|
||
|
|
mod_websockets = types.ModuleType("websockets")
|
||
|
|
mod_sync = types.ModuleType("websockets.sync")
|
||
|
|
mod_sync_client = types.ModuleType("websockets.sync.client")
|
||
|
|
|
||
|
|
captured = {"url": None, "headers": None, "kwargs": None}
|
||
|
|
|
||
|
|
def _connect(url, **kwargs):
|
||
|
|
captured["url"] = url
|
||
|
|
captured["kwargs"] = kwargs
|
||
|
|
captured["headers"] = (
|
||
|
|
kwargs.get("additional_headers") or kwargs.get("extra_headers")
|
||
|
|
)
|
||
|
|
return fake_ws
|
||
|
|
|
||
|
|
mod_sync_client.connect = _connect
|
||
|
|
mod_sync.client = mod_sync_client
|
||
|
|
mod_websockets.sync = mod_sync
|
||
|
|
|
||
|
|
monkeypatch.setitem(sys.modules, "websockets", mod_websockets)
|
||
|
|
monkeypatch.setitem(sys.modules, "websockets.sync", mod_sync)
|
||
|
|
monkeypatch.setitem(sys.modules, "websockets.sync.client", mod_sync_client)
|
||
|
|
return captured
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# connect()
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
|
||
|
|
def test_connect_sends_session_update_with_voice_and_instructions(monkeypatch):
|
||
|
|
from plugins.google_meet.realtime.openai_client import RealtimeSession
|
||
|
|
|
||
|
|
ws = _FakeWS(recv_frames=[])
|
||
|
|
captured = _install_fake_websockets(monkeypatch, ws)
|
||
|
|
|
||
|
|
sess = RealtimeSession(
|
||
|
|
api_key="sk-test",
|
||
|
|
model="gpt-realtime",
|
||
|
|
voice="verse",
|
||
|
|
instructions="Be brief.",
|
||
|
|
)
|
||
|
|
sess.connect()
|
||
|
|
|
||
|
|
# Auth + beta headers set.
|
||
|
|
assert captured["url"].startswith("wss://api.openai.com/v1/realtime")
|
||
|
|
assert "model=gpt-realtime" in captured["url"]
|
||
|
|
headers = captured["headers"] or []
|
||
|
|
hdict = dict(headers)
|
||
|
|
assert hdict.get("Authorization") == "Bearer sk-test"
|
||
|
|
assert hdict.get("OpenAI-Beta") == "realtime=v1"
|
||
|
|
|
||
|
|
# First frame sent must be session.update with the right shape.
|
||
|
|
assert len(ws.sent) == 1
|
||
|
|
update = ws.sent[0]
|
||
|
|
assert update["type"] == "session.update"
|
||
|
|
s = update["session"]
|
||
|
|
assert s["voice"] == "verse"
|
||
|
|
assert s["instructions"] == "Be brief."
|
||
|
|
assert set(s["modalities"]) == {"audio", "text"}
|
||
|
|
assert s["output_audio_format"] == "pcm16"
|
||
|
|
assert s["input_audio_format"] == "pcm16"
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# speak()
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
|
||
|
|
def test_speak_sends_create_and_response_and_writes_audio(monkeypatch, tmp_path):
|
||
|
|
from plugins.google_meet.realtime.openai_client import RealtimeSession
|
||
|
|
|
||
|
|
audio_bytes = b"\x01\x02\x03\x04PCM!"
|
||
|
|
b64 = base64.b64encode(audio_bytes).decode()
|
||
|
|
|
||
|
|
recv_frames = [
|
||
|
|
{"type": "response.created"},
|
||
|
|
{"type": "response.audio.delta", "delta": b64},
|
||
|
|
{"type": "response.audio.delta", "delta": base64.b64encode(b"more").decode()},
|
||
|
|
{"type": "response.done"},
|
||
|
|
]
|
||
|
|
ws = _FakeWS(recv_frames=recv_frames)
|
||
|
|
_install_fake_websockets(monkeypatch, ws)
|
||
|
|
|
||
|
|
sink = tmp_path / "out.pcm"
|
||
|
|
sess = RealtimeSession(api_key="sk-test", audio_sink_path=sink)
|
||
|
|
sess.connect()
|
||
|
|
result = sess.speak("Hello everyone.")
|
||
|
|
|
||
|
|
# Frames sent after session.update: conversation.item.create then response.create.
|
||
|
|
types_sent = [f["type"] for f in ws.sent]
|
||
|
|
assert types_sent == ["session.update", "conversation.item.create", "response.create"]
|
||
|
|
|
||
|
|
item = ws.sent[1]["item"]
|
||
|
|
assert item["role"] == "user"
|
||
|
|
assert item["content"][0]["type"] == "input_text"
|
||
|
|
assert item["content"][0]["text"] == "Hello everyone."
|
||
|
|
|
||
|
|
resp = ws.sent[2]["response"]
|
||
|
|
assert resp["modalities"] == ["audio"]
|
||
|
|
|
||
|
|
# Audio file got decoded + appended bytes.
|
||
|
|
data = sink.read_bytes()
|
||
|
|
assert data == audio_bytes + b"more"
|
||
|
|
assert result["ok"] is True
|
||
|
|
assert result["bytes_written"] == len(audio_bytes) + len(b"more")
|
||
|
|
assert result["duration_ms"] >= 0.0
|
||
|
|
|
||
|
|
|
||
|
|
def test_speak_raises_on_error_frame(monkeypatch, tmp_path):
|
||
|
|
from plugins.google_meet.realtime.openai_client import RealtimeSession
|
||
|
|
|
||
|
|
ws = _FakeWS(recv_frames=[
|
||
|
|
{"type": "response.created"},
|
||
|
|
{"type": "error", "error": {"message": "bad juju"}},
|
||
|
|
])
|
||
|
|
_install_fake_websockets(monkeypatch, ws)
|
||
|
|
|
||
|
|
sess = RealtimeSession(api_key="sk-test", audio_sink_path=tmp_path / "o.pcm")
|
||
|
|
sess.connect()
|
||
|
|
with pytest.raises(RuntimeError, match="bad juju"):
|
||
|
|
sess.speak("hi")
|
||
|
|
|
||
|
|
|
||
|
|
def test_speak_without_connect_raises(monkeypatch):
|
||
|
|
from plugins.google_meet.realtime.openai_client import RealtimeSession
|
||
|
|
|
||
|
|
sess = RealtimeSession(api_key="sk-test")
|
||
|
|
with pytest.raises(RuntimeError, match="connect"):
|
||
|
|
sess.speak("hi")
|
||
|
|
|
||
|
|
|
||
|
|
def test_close_is_idempotent_and_closes_ws(monkeypatch):
|
||
|
|
from plugins.google_meet.realtime.openai_client import RealtimeSession
|
||
|
|
|
||
|
|
ws = _FakeWS(recv_frames=[])
|
||
|
|
_install_fake_websockets(monkeypatch, ws)
|
||
|
|
|
||
|
|
sess = RealtimeSession(api_key="sk-test")
|
||
|
|
sess.connect()
|
||
|
|
sess.close()
|
||
|
|
assert ws.closed is True
|
||
|
|
# Second close is a no-op.
|
||
|
|
sess.close()
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# websockets dependency missing
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
|
||
|
|
def test_connect_raises_clean_error_when_websockets_missing(monkeypatch):
|
||
|
|
from plugins.google_meet.realtime.openai_client import RealtimeSession
|
||
|
|
|
||
|
|
# Make `import websockets.sync.client` fail.
|
||
|
|
monkeypatch.setitem(sys.modules, "websockets", None)
|
||
|
|
monkeypatch.setitem(sys.modules, "websockets.sync", None)
|
||
|
|
monkeypatch.setitem(sys.modules, "websockets.sync.client", None)
|
||
|
|
|
||
|
|
sess = RealtimeSession(api_key="sk-test")
|
||
|
|
with pytest.raises(RuntimeError, match="pip install websockets"):
|
||
|
|
sess.connect()
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# RealtimeSpeaker
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
|
||
|
|
class _StubSession:
|
||
|
|
def __init__(self):
|
||
|
|
self.spoken: list[str] = []
|
||
|
|
|
||
|
|
def speak(self, text, timeout=30.0): # noqa: ARG002
|
||
|
|
self.spoken.append(text)
|
||
|
|
return {"ok": True, "bytes_written": len(text), "duration_ms": 1.0}
|
||
|
|
|
||
|
|
|
||
|
|
def test_speaker_run_until_stopped_processes_queue(tmp_path):
|
||
|
|
from plugins.google_meet.realtime.openai_client import RealtimeSpeaker
|
||
|
|
|
||
|
|
queue = tmp_path / "queue.jsonl"
|
||
|
|
processed = tmp_path / "processed.jsonl"
|
||
|
|
queue.write_text(
|
||
|
|
json.dumps({"id": "a", "text": "hello one"}) + "\n"
|
||
|
|
+ json.dumps({"id": "b", "text": "hello two"}) + "\n"
|
||
|
|
)
|
||
|
|
|
||
|
|
stub = _StubSession()
|
||
|
|
speaker = RealtimeSpeaker(stub, queue_path=queue, processed_path=processed)
|
||
|
|
|
||
|
|
# Stop once the queue is empty.
|
||
|
|
def _stop():
|
||
|
|
return queue.exists() and queue.read_text().strip() == ""
|
||
|
|
|
||
|
|
speaker.run_until_stopped(_stop, poll_interval=0.01)
|
||
|
|
|
||
|
|
assert stub.spoken == ["hello one", "hello two"]
|
||
|
|
|
||
|
|
# Processed file has both entries, in order.
|
||
|
|
lines = [json.loads(l) for l in processed.read_text().splitlines() if l.strip()]
|
||
|
|
assert [l["id"] for l in lines] == ["a", "b"]
|
||
|
|
assert all(l["result"]["ok"] for l in lines)
|
||
|
|
|
||
|
|
# Queue is empty (possibly empty string) after processing.
|
||
|
|
assert queue.read_text().strip() == ""
|
||
|
|
|
||
|
|
|
||
|
|
def test_speaker_exits_immediately_when_stop_fn_true(tmp_path):
|
||
|
|
from plugins.google_meet.realtime.openai_client import RealtimeSpeaker
|
||
|
|
|
||
|
|
queue = tmp_path / "q.jsonl"
|
||
|
|
queue.write_text(json.dumps({"id": "x", "text": "never spoken"}) + "\n")
|
||
|
|
|
||
|
|
stub = _StubSession()
|
||
|
|
speaker = RealtimeSpeaker(stub, queue_path=queue)
|
||
|
|
speaker.run_until_stopped(lambda: True, poll_interval=0.01)
|
||
|
|
assert stub.spoken == []
|
||
|
|
|
||
|
|
|
||
|
|
def test_speaker_drops_line_without_processed_path_when_none(tmp_path):
|
||
|
|
from plugins.google_meet.realtime.openai_client import RealtimeSpeaker
|
||
|
|
|
||
|
|
queue = tmp_path / "q.jsonl"
|
||
|
|
queue.write_text(json.dumps({"id": "only", "text": "once"}) + "\n")
|
||
|
|
|
||
|
|
stub = _StubSession()
|
||
|
|
speaker = RealtimeSpeaker(stub, queue_path=queue, processed_path=None)
|
||
|
|
|
||
|
|
def _stop():
|
||
|
|
return queue.read_text().strip() == ""
|
||
|
|
|
||
|
|
speaker.run_until_stopped(_stop, poll_interval=0.01)
|
||
|
|
assert stub.spoken == ["once"]
|
||
|
|
assert queue.read_text().strip() == ""
|