mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix(aux): add session_search extra_body and concurrency controls
Adds auxiliary.<task>.extra_body config passthrough so reasoning-heavy OpenAI-compatible providers can receive provider-specific request fields (e.g. enable_thinking: false on GLM) on auxiliary calls, and bounds session_search summary fan-out with auxiliary.session_search.max_concurrency (default 3, clamped 1-5) to avoid 429 bursts on small providers. - agent/auxiliary_client.py: extract _get_auxiliary_task_config helper, add _get_task_extra_body, merge config+explicit extra_body with explicit winning - hermes_cli/config.py: extra_body defaults on all aux tasks + session_search.max_concurrency; _config_version 19 -> 20 - tools/session_search_tool.py: semaphore around _summarize_all gather - tests: coverage in test_auxiliary_client, test_session_search, test_aux_config - docs: user-guide/configuration.md + fallback-providers.md Co-authored-by: Teknium <teknium@nousresearch.com>
This commit is contained in:
@@ -27,6 +27,27 @@ MAX_SESSION_CHARS = 100_000
|
||||
MAX_SUMMARY_TOKENS = 10000
|
||||
|
||||
|
||||
def _get_session_search_max_concurrency(default: int = 3) -> int:
|
||||
"""Read auxiliary.session_search.max_concurrency with sane bounds."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
except ImportError:
|
||||
return default
|
||||
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
|
||||
task_config = aux.get("session_search", {}) if isinstance(aux, dict) else {}
|
||||
if not isinstance(task_config, dict):
|
||||
return default
|
||||
raw = task_config.get("max_concurrency")
|
||||
if raw is None:
|
||||
return default
|
||||
try:
|
||||
value = int(raw)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return max(1, min(value, 5))
|
||||
|
||||
|
||||
def _format_timestamp(ts: Union[int, float, str, None]) -> str:
|
||||
"""Convert a Unix timestamp (float/int) or ISO string to a human-readable date.
|
||||
|
||||
@@ -423,9 +444,16 @@ def session_search(
|
||||
|
||||
# Summarize all sessions in parallel
|
||||
async def _summarize_all() -> List[Union[str, Exception]]:
|
||||
"""Summarize all sessions in parallel."""
|
||||
"""Summarize all sessions with bounded concurrency."""
|
||||
max_concurrency = min(_get_session_search_max_concurrency(), max(1, len(tasks)))
|
||||
semaphore = asyncio.Semaphore(max_concurrency)
|
||||
|
||||
async def _bounded_summary(text: str, meta: Dict[str, Any]) -> Optional[str]:
|
||||
async with semaphore:
|
||||
return await _summarize_session(text, query, meta)
|
||||
|
||||
coros = [
|
||||
_summarize_session(text, query, meta)
|
||||
_bounded_summary(text, meta)
|
||||
for _, _, text, meta in tasks
|
||||
]
|
||||
return await asyncio.gather(*coros, return_exceptions=True)
|
||||
|
||||
Reference in New Issue
Block a user