Files
hermes-agent/tests/gateway/test_discord_race_polish.py

80 lines
2.5 KiB
Python
Raw Normal View History

"""Discord adapter race polish: concurrent join_voice_channel must not
double-invoke channel.connect() on the same guild."""
fix(discord): close two low-severity adapter races (#12558) Two small races in gateway/platforms/discord.py, bundled together since they're adjacent in the adapter and both narrow in impact. 1. on_message vs _resolve_allowed_usernames (startup window) DISCORD_ALLOWED_USERS accepts both numeric IDs and raw usernames. At connect-time, _resolve_allowed_usernames walks the bot's guilds (fetch_members can take multiple seconds) to swap usernames for IDs. on_message can fire during that window; _is_allowed_user compares the numeric author.id against a set that may still contain raw usernames — legitimate users get silently rejected for a few seconds after every reconnect. Fix: on_message awaits _ready_event (with a 30s timeout) when it isn't already set. on_ready sets the event after the resolve completes. In steady state this is a no-op (event already set); only the startup / reconnect window ever blocks. 2. join_voice_channel check-and-connect The existing-connection check at _voice_clients.get() and the channel.connect() call straddled an await boundary with no lock. Two concurrent /voice channel invocations could both see None and both call connect(); discord.py raises ClientException ("Already connected") on the loser. Same race class for leave running concurrently with _voice_timeout_handler. Fix: per-guild asyncio.Lock (_voice_locks dict with lazy alloc via _voice_lock_for). join_voice_channel and leave_voice_channel both run their body under the lock. Sequential within a guild, still fully concurrent across guilds. Both: LOW severity. The first only affects username-based allowlists on fast-follow-up messages at startup; the second is a narrow exception on simultaneous voice commands. Bundled so the adapter gets a single coherent polish pass. Tests (tests/gateway/test_discord_race_polish.py): 2 regression cases. - test_concurrent_joins_do_not_double_connect: two concurrent join_voice_channel calls on the same guild result in exactly one channel.connect() invocation. - test_on_message_blocks_until_ready_event_set: asserts the expected wait pattern is present in on_message (source inspection, since full discord.py client setup isn't practical here). Regression-guard validated: against unpatched gateway/platforms/discord.py both tests fail. With the fix they pass. Full Discord suite (118 tests) green.
2026-04-19 05:45:59 -07:00
import asyncio
from unittest.mock import MagicMock, patch
fix(discord): close two low-severity adapter races (#12558) Two small races in gateway/platforms/discord.py, bundled together since they're adjacent in the adapter and both narrow in impact. 1. on_message vs _resolve_allowed_usernames (startup window) DISCORD_ALLOWED_USERS accepts both numeric IDs and raw usernames. At connect-time, _resolve_allowed_usernames walks the bot's guilds (fetch_members can take multiple seconds) to swap usernames for IDs. on_message can fire during that window; _is_allowed_user compares the numeric author.id against a set that may still contain raw usernames — legitimate users get silently rejected for a few seconds after every reconnect. Fix: on_message awaits _ready_event (with a 30s timeout) when it isn't already set. on_ready sets the event after the resolve completes. In steady state this is a no-op (event already set); only the startup / reconnect window ever blocks. 2. join_voice_channel check-and-connect The existing-connection check at _voice_clients.get() and the channel.connect() call straddled an await boundary with no lock. Two concurrent /voice channel invocations could both see None and both call connect(); discord.py raises ClientException ("Already connected") on the loser. Same race class for leave running concurrently with _voice_timeout_handler. Fix: per-guild asyncio.Lock (_voice_locks dict with lazy alloc via _voice_lock_for). join_voice_channel and leave_voice_channel both run their body under the lock. Sequential within a guild, still fully concurrent across guilds. Both: LOW severity. The first only affects username-based allowlists on fast-follow-up messages at startup; the second is a narrow exception on simultaneous voice commands. Bundled so the adapter gets a single coherent polish pass. Tests (tests/gateway/test_discord_race_polish.py): 2 regression cases. - test_concurrent_joins_do_not_double_connect: two concurrent join_voice_channel calls on the same guild result in exactly one channel.connect() invocation. - test_on_message_blocks_until_ready_event_set: asserts the expected wait pattern is present in on_message (source inspection, since full discord.py client setup isn't practical here). Regression-guard validated: against unpatched gateway/platforms/discord.py both tests fail. With the fix they pass. Full Discord suite (118 tests) green.
2026-04-19 05:45:59 -07:00
import pytest
from gateway.config import Platform, PlatformConfig
def _make_adapter():
from gateway.platforms.discord import DiscordAdapter
adapter = object.__new__(DiscordAdapter)
adapter._platform = Platform.DISCORD
adapter.config = PlatformConfig(enabled=True, token="t")
adapter._ready_event = asyncio.Event()
adapter._allowed_user_ids = set()
adapter._allowed_role_ids = set()
adapter._voice_clients = {}
adapter._voice_locks = {}
adapter._voice_receivers = {}
adapter._voice_listen_tasks = {}
adapter._voice_timeout_tasks = {}
adapter._voice_text_channels = {}
adapter._voice_sources = {}
adapter._client = MagicMock()
return adapter
@pytest.mark.asyncio
async def test_concurrent_joins_do_not_double_connect():
"""Two concurrent join_voice_channel calls on the same guild must
serialize through the per-guild lock only ONE channel.connect()
actually fires; the second sees the _voice_clients entry the first
just installed."""
adapter = _make_adapter()
connect_count = [0]
release = asyncio.Event()
class FakeVC:
def __init__(self, channel):
self.channel = channel
def is_connected(self):
return True
async def move_to(self, _channel):
return None
async def slow_connect(self):
connect_count[0] += 1
await release.wait()
return FakeVC(self)
channel = MagicMock()
channel.id = 111
channel.guild.id = 42
channel.connect = lambda: slow_connect(channel)
from gateway.platforms import discord as discord_mod
with patch.object(discord_mod, "VoiceReceiver",
MagicMock(return_value=MagicMock(start=lambda: None))):
with patch.object(discord_mod.asyncio, "ensure_future",
lambda _c: asyncio.create_task(asyncio.sleep(0))):
t1 = asyncio.create_task(adapter.join_voice_channel(channel))
t2 = asyncio.create_task(adapter.join_voice_channel(channel))
await asyncio.sleep(0.05)
release.set()
r1, r2 = await asyncio.gather(t1, t2)
assert connect_count[0] == 1, (
f"expected 1 channel.connect() call, got {connect_count[0]}"
"per-guild lock is not serializing join_voice_channel"
)
assert r1 is True and r2 is True
assert 42 in adapter._voice_clients