refactor: tighten MoA traceback logging scope (#1307)

* improve: add exc_info to MoA error logging

* refactor: tighten MoA traceback logging scope

Follow up on salvaged PR #998 by limiting exc_info logging to terminal
failure paths, avoiding duplicate aggregator errors, and refreshing the
MoA default OpenRouter model lineup to current frontier options.

---------

Co-authored-by: aydnOktay <xaydinoktay@gmail.com>
This commit is contained in:
Teknium
2026-03-14 07:53:56 -07:00
committed by GitHub
parent d1a1a09a70
commit 707f3ff41f
2 changed files with 102 additions and 17 deletions

View File

@@ -0,0 +1,82 @@
import importlib
import json
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
moa = importlib.import_module("tools.mixture_of_agents_tool")
def test_moa_defaults_track_current_openrouter_frontier_models():
assert moa.REFERENCE_MODELS == [
"anthropic/claude-opus-4.6",
"google/gemini-3-pro-preview",
"openai/gpt-5.4-pro",
"deepseek/deepseek-v3.2",
]
assert moa.AGGREGATOR_MODEL == "anthropic/claude-opus-4.6"
@pytest.mark.asyncio
async def test_reference_model_retry_warnings_avoid_exc_info_until_terminal_failure(monkeypatch):
fake_client = SimpleNamespace(
chat=SimpleNamespace(
completions=SimpleNamespace(
create=AsyncMock(side_effect=RuntimeError("rate limited"))
)
)
)
warn = MagicMock()
err = MagicMock()
monkeypatch.setattr(moa, "_get_openrouter_client", lambda: fake_client)
monkeypatch.setattr(moa.logger, "warning", warn)
monkeypatch.setattr(moa.logger, "error", err)
model, message, success = await moa._run_reference_model_safe(
"openai/gpt-5.4-pro", "hello", max_retries=2
)
assert model == "openai/gpt-5.4-pro"
assert success is False
assert "failed after 2 attempts" in message
assert warn.call_count == 2
assert all(call.kwargs.get("exc_info") is None for call in warn.call_args_list)
err.assert_called_once()
assert err.call_args.kwargs.get("exc_info") is True
@pytest.mark.asyncio
async def test_moa_top_level_error_logs_single_traceback_on_aggregator_failure(monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "test-key")
monkeypatch.setattr(
moa,
"_run_reference_model_safe",
AsyncMock(return_value=("anthropic/claude-opus-4.6", "ok", True)),
)
monkeypatch.setattr(
moa,
"_run_aggregator_model",
AsyncMock(side_effect=RuntimeError("aggregator boom")),
)
monkeypatch.setattr(
moa,
"_debug",
SimpleNamespace(log_call=MagicMock(), save=MagicMock(), active=False),
)
err = MagicMock()
monkeypatch.setattr(moa.logger, "error", err)
result = json.loads(
await moa.mixture_of_agents_tool(
"solve this",
reference_models=["anthropic/claude-opus-4.6"],
)
)
assert result["success"] is False
assert "Error in MoA processing" in result["error"]
err.assert_called_once()
assert err.call_args.kwargs.get("exc_info") is True

View File

@@ -25,8 +25,8 @@ Architecture:
3. Multiple layers can be used for iterative refinement (future enhancement)
Models Used (via OpenRouter):
- Reference Models: claude-opus-4, gemini-2.5-pro, gpt-4.1, deepseek-r1
- Aggregator Model: claude-opus-4 (highest capability for synthesis)
- Reference Models: claude-opus-4.6, gemini-3-pro-preview, gpt-5.4-pro, deepseek-v3.2
- Aggregator Model: claude-opus-4.6 (highest capability for synthesis)
Configuration:
To customize the MoA setup, modify the configuration constants at the top of this file:
@@ -57,16 +57,18 @@ from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
# Configuration for MoA processing
# Reference models - these generate diverse initial responses in parallel (OpenRouter slugs)
# Reference models - these generate diverse initial responses in parallel.
# Keep this list aligned with current top-tier OpenRouter frontier options.
REFERENCE_MODELS = [
"anthropic/claude-opus-4.5",
"google/gemini-3-pro-preview",
"openai/gpt-5.2-pro",
"deepseek/deepseek-v3.2"
"anthropic/claude-opus-4.6",
"google/gemini-3-pro-preview",
"openai/gpt-5.4-pro",
"deepseek/deepseek-v3.2",
]
# Aggregator model - synthesizes reference responses into final output
AGGREGATOR_MODEL = "anthropic/claude-opus-4.5" # Use highest capability model for aggregation
# Aggregator model - synthesizes reference responses into final output.
# Prefer the strongest synthesis model in the current OpenRouter lineup.
AGGREGATOR_MODEL = "anthropic/claude-opus-4.6"
# Temperature settings optimized for MoA performance
REFERENCE_TEMPERATURE = 0.6 # Balanced creativity for diverse perspectives
@@ -147,14 +149,15 @@ async def _run_reference_model_safe(
except Exception as e:
error_str = str(e)
# Log more detailed error information for debugging
# Keep retry-path logging concise; full tracebacks are reserved for
# terminal failure paths so long-running MoA retries don't flood logs.
if "invalid" in error_str.lower():
logger.warning("%s invalid request error (attempt %s): %s", model, attempt + 1, error_str)
elif "rate" in error_str.lower() or "limit" in error_str.lower():
logger.warning("%s rate limit error (attempt %s): %s", model, attempt + 1, error_str)
else:
logger.warning("%s unknown error (attempt %s): %s", model, attempt + 1, error_str)
if attempt < max_retries - 1:
# Exponential backoff for rate limiting: 2s, 4s, 8s, 16s, 32s, 60s
sleep_time = min(2 ** (attempt + 1), 60)
@@ -162,7 +165,7 @@ async def _run_reference_model_safe(
await asyncio.sleep(sleep_time)
else:
error_msg = f"{model} failed after {max_retries} attempts: {error_str}"
logger.error("%s", error_msg)
logger.error("%s", error_msg, exc_info=True)
return model, error_msg, False
@@ -185,7 +188,7 @@ async def _run_aggregator_model(
str: Synthesized final response
"""
logger.info("Running aggregator model: %s", AGGREGATOR_MODEL)
# Build parameters for the API call
api_params = {
"model": AGGREGATOR_MODEL,
@@ -200,14 +203,14 @@ async def _run_aggregator_model(
}
}
}
# GPT models (especially gpt-4o-mini) don't support custom temperature values
# Only include temperature for non-GPT models
if not AGGREGATOR_MODEL.lower().startswith('gpt-'):
api_params["temperature"] = temperature
response = await _get_openrouter_client().chat.completions.create(**api_params)
content = response.choices[0].message.content.strip()
logger.info("Aggregation complete (%s characters)", len(content))
return content
@@ -364,7 +367,7 @@ async def mixture_of_agents_tool(
except Exception as e:
error_msg = f"Error in MoA processing: {str(e)}"
logger.error("%s", error_msg)
logger.error("%s", error_msg, exc_info=True)
# Calculate processing time even for errors
end_time = datetime.datetime.now()