mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
refactor: tighten MoA traceback logging scope (#1307)
* improve: add exc_info to MoA error logging * refactor: tighten MoA traceback logging scope Follow up on salvaged PR #998 by limiting exc_info logging to terminal failure paths, avoiding duplicate aggregator errors, and refreshing the MoA default OpenRouter model lineup to current frontier options. --------- Co-authored-by: aydnOktay <xaydinoktay@gmail.com>
This commit is contained in:
82
tests/tools/test_mixture_of_agents_tool.py
Normal file
82
tests/tools/test_mixture_of_agents_tool.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import importlib
|
||||
import json
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
moa = importlib.import_module("tools.mixture_of_agents_tool")
|
||||
|
||||
|
||||
def test_moa_defaults_track_current_openrouter_frontier_models():
|
||||
assert moa.REFERENCE_MODELS == [
|
||||
"anthropic/claude-opus-4.6",
|
||||
"google/gemini-3-pro-preview",
|
||||
"openai/gpt-5.4-pro",
|
||||
"deepseek/deepseek-v3.2",
|
||||
]
|
||||
assert moa.AGGREGATOR_MODEL == "anthropic/claude-opus-4.6"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reference_model_retry_warnings_avoid_exc_info_until_terminal_failure(monkeypatch):
|
||||
fake_client = SimpleNamespace(
|
||||
chat=SimpleNamespace(
|
||||
completions=SimpleNamespace(
|
||||
create=AsyncMock(side_effect=RuntimeError("rate limited"))
|
||||
)
|
||||
)
|
||||
)
|
||||
warn = MagicMock()
|
||||
err = MagicMock()
|
||||
|
||||
monkeypatch.setattr(moa, "_get_openrouter_client", lambda: fake_client)
|
||||
monkeypatch.setattr(moa.logger, "warning", warn)
|
||||
monkeypatch.setattr(moa.logger, "error", err)
|
||||
|
||||
model, message, success = await moa._run_reference_model_safe(
|
||||
"openai/gpt-5.4-pro", "hello", max_retries=2
|
||||
)
|
||||
|
||||
assert model == "openai/gpt-5.4-pro"
|
||||
assert success is False
|
||||
assert "failed after 2 attempts" in message
|
||||
assert warn.call_count == 2
|
||||
assert all(call.kwargs.get("exc_info") is None for call in warn.call_args_list)
|
||||
err.assert_called_once()
|
||||
assert err.call_args.kwargs.get("exc_info") is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_moa_top_level_error_logs_single_traceback_on_aggregator_failure(monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "test-key")
|
||||
monkeypatch.setattr(
|
||||
moa,
|
||||
"_run_reference_model_safe",
|
||||
AsyncMock(return_value=("anthropic/claude-opus-4.6", "ok", True)),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
moa,
|
||||
"_run_aggregator_model",
|
||||
AsyncMock(side_effect=RuntimeError("aggregator boom")),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
moa,
|
||||
"_debug",
|
||||
SimpleNamespace(log_call=MagicMock(), save=MagicMock(), active=False),
|
||||
)
|
||||
|
||||
err = MagicMock()
|
||||
monkeypatch.setattr(moa.logger, "error", err)
|
||||
|
||||
result = json.loads(
|
||||
await moa.mixture_of_agents_tool(
|
||||
"solve this",
|
||||
reference_models=["anthropic/claude-opus-4.6"],
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is False
|
||||
assert "Error in MoA processing" in result["error"]
|
||||
err.assert_called_once()
|
||||
assert err.call_args.kwargs.get("exc_info") is True
|
||||
@@ -25,8 +25,8 @@ Architecture:
|
||||
3. Multiple layers can be used for iterative refinement (future enhancement)
|
||||
|
||||
Models Used (via OpenRouter):
|
||||
- Reference Models: claude-opus-4, gemini-2.5-pro, gpt-4.1, deepseek-r1
|
||||
- Aggregator Model: claude-opus-4 (highest capability for synthesis)
|
||||
- Reference Models: claude-opus-4.6, gemini-3-pro-preview, gpt-5.4-pro, deepseek-v3.2
|
||||
- Aggregator Model: claude-opus-4.6 (highest capability for synthesis)
|
||||
|
||||
Configuration:
|
||||
To customize the MoA setup, modify the configuration constants at the top of this file:
|
||||
@@ -57,16 +57,18 @@ from tools.debug_helpers import DebugSession
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration for MoA processing
|
||||
# Reference models - these generate diverse initial responses in parallel (OpenRouter slugs)
|
||||
# Reference models - these generate diverse initial responses in parallel.
|
||||
# Keep this list aligned with current top-tier OpenRouter frontier options.
|
||||
REFERENCE_MODELS = [
|
||||
"anthropic/claude-opus-4.5",
|
||||
"google/gemini-3-pro-preview",
|
||||
"openai/gpt-5.2-pro",
|
||||
"deepseek/deepseek-v3.2"
|
||||
"anthropic/claude-opus-4.6",
|
||||
"google/gemini-3-pro-preview",
|
||||
"openai/gpt-5.4-pro",
|
||||
"deepseek/deepseek-v3.2",
|
||||
]
|
||||
|
||||
# Aggregator model - synthesizes reference responses into final output
|
||||
AGGREGATOR_MODEL = "anthropic/claude-opus-4.5" # Use highest capability model for aggregation
|
||||
# Aggregator model - synthesizes reference responses into final output.
|
||||
# Prefer the strongest synthesis model in the current OpenRouter lineup.
|
||||
AGGREGATOR_MODEL = "anthropic/claude-opus-4.6"
|
||||
|
||||
# Temperature settings optimized for MoA performance
|
||||
REFERENCE_TEMPERATURE = 0.6 # Balanced creativity for diverse perspectives
|
||||
@@ -147,14 +149,15 @@ async def _run_reference_model_safe(
|
||||
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
# Log more detailed error information for debugging
|
||||
# Keep retry-path logging concise; full tracebacks are reserved for
|
||||
# terminal failure paths so long-running MoA retries don't flood logs.
|
||||
if "invalid" in error_str.lower():
|
||||
logger.warning("%s invalid request error (attempt %s): %s", model, attempt + 1, error_str)
|
||||
elif "rate" in error_str.lower() or "limit" in error_str.lower():
|
||||
logger.warning("%s rate limit error (attempt %s): %s", model, attempt + 1, error_str)
|
||||
else:
|
||||
logger.warning("%s unknown error (attempt %s): %s", model, attempt + 1, error_str)
|
||||
|
||||
|
||||
if attempt < max_retries - 1:
|
||||
# Exponential backoff for rate limiting: 2s, 4s, 8s, 16s, 32s, 60s
|
||||
sleep_time = min(2 ** (attempt + 1), 60)
|
||||
@@ -162,7 +165,7 @@ async def _run_reference_model_safe(
|
||||
await asyncio.sleep(sleep_time)
|
||||
else:
|
||||
error_msg = f"{model} failed after {max_retries} attempts: {error_str}"
|
||||
logger.error("%s", error_msg)
|
||||
logger.error("%s", error_msg, exc_info=True)
|
||||
return model, error_msg, False
|
||||
|
||||
|
||||
@@ -185,7 +188,7 @@ async def _run_aggregator_model(
|
||||
str: Synthesized final response
|
||||
"""
|
||||
logger.info("Running aggregator model: %s", AGGREGATOR_MODEL)
|
||||
|
||||
|
||||
# Build parameters for the API call
|
||||
api_params = {
|
||||
"model": AGGREGATOR_MODEL,
|
||||
@@ -200,14 +203,14 @@ async def _run_aggregator_model(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# GPT models (especially gpt-4o-mini) don't support custom temperature values
|
||||
# Only include temperature for non-GPT models
|
||||
if not AGGREGATOR_MODEL.lower().startswith('gpt-'):
|
||||
api_params["temperature"] = temperature
|
||||
|
||||
|
||||
response = await _get_openrouter_client().chat.completions.create(**api_params)
|
||||
|
||||
|
||||
content = response.choices[0].message.content.strip()
|
||||
logger.info("Aggregation complete (%s characters)", len(content))
|
||||
return content
|
||||
@@ -364,7 +367,7 @@ async def mixture_of_agents_tool(
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in MoA processing: {str(e)}"
|
||||
logger.error("%s", error_msg)
|
||||
logger.error("%s", error_msg, exc_info=True)
|
||||
|
||||
# Calculate processing time even for errors
|
||||
end_time = datetime.datetime.now()
|
||||
|
||||
Reference in New Issue
Block a user