Compare commits

...

1 Commits

Author SHA1 Message Date
LucidPaths
0137296564 fix: remove stale test skips, fix regex backtracking, file search bug, and test flakiness
Bug fixes:
- agent/redact.py: catastrophic regex backtracking in _ENV_ASSIGN_RE — removed
  re.IGNORECASE and changed [A-Z_]* to [A-Z0-9_]* to restrict matching to actual
  env var name chars. Without this, the pattern backtracks exponentially on large
  strings (e.g. 100K tool output), causing test_file_read_guards to time out.
- tools/file_operations.py: over-escaped newline in find -printf format string
  produced literal backslash-n instead of a real newline, breaking file search
  result parsing (total_count always 1, paths concatenated).

Test fixes:
- Remove stale pytestmark.skip from 4 test modules that were blanket-skipped as
  'Hangs in non-interactive environments' but actually run fine:
  - test_413_compression.py (12 tests, 25s)
  - test_file_tools_live.py (71 tests, 24s)
  - test_code_execution.py (61 tests, 99s)
  - test_agent_loop_tool_calling.py (has proper OPENROUTER_API_KEY skip already)
- test_413_compression.py: fix threshold values in 2 preflight compression tests
  where context_length was too small for the compressed output to fit in one pass.
- test_mcp_probe.py: add missing _MCP_AVAILABLE mock so tests work without MCP SDK.
- test_mcp_tool_issue_948.py: inject MCP symbols (StdioServerParameters etc.) when
  SDK is not installed so patch() targets exist.
- test_approve_deny_commands.py: replace time.sleep(0.3) with deterministic polling
  of _gateway_queues — fixes race condition where resolve fires before threads
  register their approval entries, causing the test to hang indefinitely.

Net effect: +256 tests recovered from skip, 8 real failures fixed.
2026-04-03 22:43:45 -07:00
9 changed files with 45 additions and 20 deletions

View File

@@ -53,8 +53,7 @@ _PREFIX_PATTERNS = [
# ENV assignment patterns: KEY=value where KEY contains a secret-like name # ENV assignment patterns: KEY=value where KEY contains a secret-like name
_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)" _SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
_ENV_ASSIGN_RE = re.compile( _ENV_ASSIGN_RE = re.compile(
rf"([A-Z_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2", rf"([A-Z0-9_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z0-9_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
re.IGNORECASE,
) )
# JSON field patterns: "apiKey": "value", "token": "value", etc. # JSON field patterns: "apiKey": "value", "token": "value", etc.

View File

@@ -591,7 +591,16 @@ class TestBlockingApprovalE2E:
] ]
for t in threads: for t in threads:
t.start() t.start()
time.sleep(0.3)
# Wait for both threads to register pending approvals instead of
# relying on a fixed sleep. The approval module stores entries in
# _gateway_queues[session_key] — poll until we see 2 entries.
from tools.approval import _gateway_queues
deadline = time.monotonic() + 5
while time.monotonic() < deadline:
if len(_gateway_queues.get(session_key, [])) >= 2:
break
time.sleep(0.05)
# Approve first, deny second # Approve first, deny second
resolve_gateway_approval(session_key, "once") # oldest resolve_gateway_approval(session_key, "once") # oldest

View File

@@ -7,7 +7,7 @@ Verifies that:
""" """
import pytest import pytest
pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments") #pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")
@@ -318,12 +318,13 @@ class TestPreflightCompression:
def test_preflight_compresses_oversized_history(self, agent): def test_preflight_compresses_oversized_history(self, agent):
"""When loaded history exceeds the model's context threshold, compress before API call.""" """When loaded history exceeds the model's context threshold, compress before API call."""
agent.compression_enabled = True agent.compression_enabled = True
# Set a very small context so the history is "oversized" # Set a small context so the history is "oversized", but large enough
agent.context_compressor.context_length = 100 # that the compressed result (2 short messages) fits in a single pass.
agent.context_compressor.threshold_tokens = 85 # 85% of 100 agent.context_compressor.context_length = 2000
agent.context_compressor.threshold_tokens = 200
# Build a history that will be large enough to trigger preflight # Build a history that will be large enough to trigger preflight
# (each message ~20 chars = ~5 tokens, 20 messages = ~100 tokens > 85 threshold) # (each message ~50 chars ≈ 13 tokens, 40 messages ≈ 520 tokens > 200 threshold)
big_history = [] big_history = []
for i in range(20): for i in range(20):
big_history.append({"role": "user", "content": f"Message number {i} with some extra text padding"}) big_history.append({"role": "user", "content": f"Message number {i} with some extra text padding"})
@@ -338,7 +339,7 @@ class TestPreflightCompression:
patch.object(agent, "_save_trajectory"), patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"), patch.object(agent, "_cleanup_task_resources"),
): ):
# Simulate compression reducing messages # Simulate compression reducing messages to a small set that fits
mock_compress.return_value = ( mock_compress.return_value = (
[ [
{"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"}, {"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
@@ -411,7 +412,7 @@ class TestToolResultPreflightCompression:
"""When tool results push estimated tokens past threshold, compress before next call.""" """When tool results push estimated tokens past threshold, compress before next call."""
agent.compression_enabled = True agent.compression_enabled = True
agent.context_compressor.context_length = 200_000 agent.context_compressor.context_length = 200_000
agent.context_compressor.threshold_tokens = 140_000 agent.context_compressor.threshold_tokens = 130_000 # below the 135k reported usage
agent.context_compressor.last_prompt_tokens = 130_000 agent.context_compressor.last_prompt_tokens = 130_000
agent.context_compressor.last_completion_tokens = 5_000 agent.context_compressor.last_completion_tokens = 5_000

View File

@@ -28,7 +28,7 @@ from unittest.mock import patch
import pytest import pytest
pytestmark = pytest.mark.skip(reason="Live API integration test — hangs in batch runs") # pytestmark removed — tests skip gracefully via OPENROUTER_API_KEY check on line 59
# Ensure repo root is importable # Ensure repo root is importable
_repo_root = Path(__file__).resolve().parent.parent _repo_root = Path(__file__).resolve().parent.parent

View File

@@ -13,7 +13,7 @@ Run with: python -m pytest tests/test_code_execution.py -v
""" """
import pytest import pytest
pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments") # pytestmark removed — tests run fine (61 pass, ~99s)
import json import json

View File

@@ -9,7 +9,7 @@ asserts zero contamination from shell noise via _assert_clean().
""" """
import pytest import pytest
pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")

View File

@@ -61,7 +61,8 @@ class TestProbeMcpServerTools:
async def fake_connect(name, cfg): async def fake_connect(name, cfg):
return mock_server return mock_server
with patch("tools.mcp_tool._load_mcp_config", return_value=config), \ with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
patch("tools.mcp_tool._load_mcp_config", return_value=config), \
patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \ patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
patch("tools.mcp_tool._ensure_mcp_loop"), \ patch("tools.mcp_tool._ensure_mcp_loop"), \
patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \ patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
@@ -102,7 +103,8 @@ class TestProbeMcpServerTools:
raise ConnectionError("Server not found") raise ConnectionError("Server not found")
return mock_server return mock_server
with patch("tools.mcp_tool._load_mcp_config", return_value=config), \ with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
patch("tools.mcp_tool._load_mcp_config", return_value=config), \
patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \ patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
patch("tools.mcp_tool._ensure_mcp_loop"), \ patch("tools.mcp_tool._ensure_mcp_loop"), \
patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \ patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
@@ -135,7 +137,8 @@ class TestProbeMcpServerTools:
async def fake_connect(name, cfg): async def fake_connect(name, cfg):
return mock_server return mock_server
with patch("tools.mcp_tool._load_mcp_config", return_value=config), \ with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
patch("tools.mcp_tool._load_mcp_config", return_value=config), \
patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \ patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
patch("tools.mcp_tool._ensure_mcp_loop"), \ patch("tools.mcp_tool._ensure_mcp_loop"), \
patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \ patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
@@ -159,7 +162,8 @@ class TestProbeMcpServerTools:
"""_stop_mcp_loop is called even when probe fails.""" """_stop_mcp_loop is called even when probe fails."""
config = {"github": {"command": "npx", "connect_timeout": 5}} config = {"github": {"command": "npx", "connect_timeout": 5}}
with patch("tools.mcp_tool._load_mcp_config", return_value=config), \ with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
patch("tools.mcp_tool._load_mcp_config", return_value=config), \
patch("tools.mcp_tool._ensure_mcp_loop"), \ patch("tools.mcp_tool._ensure_mcp_loop"), \
patch("tools.mcp_tool._run_on_mcp_loop", side_effect=RuntimeError("boom")), \ patch("tools.mcp_tool._run_on_mcp_loop", side_effect=RuntimeError("boom")), \
patch("tools.mcp_tool._stop_mcp_loop") as mock_stop: patch("tools.mcp_tool._stop_mcp_loop") as mock_stop:
@@ -187,7 +191,8 @@ class TestProbeMcpServerTools:
connect_calls.append(name) connect_calls.append(name)
return mock_server return mock_server
with patch("tools.mcp_tool._load_mcp_config", return_value=config), \ with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
patch("tools.mcp_tool._load_mcp_config", return_value=config), \
patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \ patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
patch("tools.mcp_tool._ensure_mcp_loop"), \ patch("tools.mcp_tool._ensure_mcp_loop"), \
patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \ patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \

View File

@@ -1,11 +1,22 @@
import asyncio import asyncio
import os import os
import sys
from types import SimpleNamespace from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch from unittest.mock import AsyncMock, MagicMock, patch
import pytest import pytest
from tools.mcp_tool import MCPServerTask, _format_connect_error, _resolve_stdio_command from tools.mcp_tool import MCPServerTask, _format_connect_error, _resolve_stdio_command, _MCP_AVAILABLE
# Ensure the mcp module symbols exist for patching even when the SDK isn't installed
if not _MCP_AVAILABLE:
import tools.mcp_tool as _mcp_mod
if not hasattr(_mcp_mod, "StdioServerParameters"):
_mcp_mod.StdioServerParameters = MagicMock
if not hasattr(_mcp_mod, "stdio_client"):
_mcp_mod.stdio_client = MagicMock
if not hasattr(_mcp_mod, "ClientSession"):
_mcp_mod.ClientSession = MagicMock
def test_resolve_stdio_command_falls_back_to_hermes_node_bin(tmp_path): def test_resolve_stdio_command_falls_back_to_hermes_node_bin(tmp_path):

View File

@@ -898,7 +898,7 @@ class ShellFileOperations(FileOperations):
hidden_exclude = "-not -path '*/.*'" hidden_exclude = "-not -path '*/.*'"
cmd = f"find {self._escape_shell_arg(path)} {hidden_exclude} -type f -name {self._escape_shell_arg(search_pattern)} " \ cmd = f"find {self._escape_shell_arg(path)} {hidden_exclude} -type f -name {self._escape_shell_arg(search_pattern)} " \
f"-printf '%T@ %p\\\\n' 2>/dev/null | sort -rn | tail -n +{offset + 1} | head -n {limit}" f"-printf '%T@ %p\\n' 2>/dev/null | sort -rn | tail -n +{offset + 1} | head -n {limit}"
result = self._exec(cmd, timeout=60) result = self._exec(cmd, timeout=60)