fix(aux): add session_search extra_body and concurrency controls

Adds auxiliary.<task>.extra_body config passthrough so reasoning-heavy
OpenAI-compatible providers can receive provider-specific request fields
(e.g. enable_thinking: false on GLM) on auxiliary calls, and bounds
session_search summary fan-out with auxiliary.session_search.max_concurrency
(default 3, clamped 1-5) to avoid 429 bursts on small providers.

- agent/auxiliary_client.py: extract _get_auxiliary_task_config helper,
  add _get_task_extra_body, merge config+explicit extra_body with explicit winning
- hermes_cli/config.py: extra_body defaults on all aux tasks +
  session_search.max_concurrency; _config_version 19 -> 20
- tools/session_search_tool.py: semaphore around _summarize_all gather
- tests: coverage in test_auxiliary_client, test_session_search, test_aux_config
- docs: user-guide/configuration.md + fallback-providers.md

Co-authored-by: Teknium <teknium@nousresearch.com>
This commit is contained in:
helix4u
2026-04-20 00:44:32 -07:00
committed by Teknium
parent 904f20d622
commit 6ab78401c9
8 changed files with 207 additions and 26 deletions

View File

@@ -27,6 +27,27 @@ MAX_SESSION_CHARS = 100_000
MAX_SUMMARY_TOKENS = 10000
def _get_session_search_max_concurrency(default: int = 3) -> int:
"""Read auxiliary.session_search.max_concurrency with sane bounds."""
try:
from hermes_cli.config import load_config
config = load_config()
except ImportError:
return default
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
task_config = aux.get("session_search", {}) if isinstance(aux, dict) else {}
if not isinstance(task_config, dict):
return default
raw = task_config.get("max_concurrency")
if raw is None:
return default
try:
value = int(raw)
except (TypeError, ValueError):
return default
return max(1, min(value, 5))
def _format_timestamp(ts: Union[int, float, str, None]) -> str:
"""Convert a Unix timestamp (float/int) or ISO string to a human-readable date.
@@ -423,9 +444,16 @@ def session_search(
# Summarize all sessions in parallel
async def _summarize_all() -> List[Union[str, Exception]]:
"""Summarize all sessions in parallel."""
"""Summarize all sessions with bounded concurrency."""
max_concurrency = min(_get_session_search_max_concurrency(), max(1, len(tasks)))
semaphore = asyncio.Semaphore(max_concurrency)
async def _bounded_summary(text: str, meta: Dict[str, Any]) -> Optional[str]:
async with semaphore:
return await _summarize_session(text, query, meta)
coros = [
_summarize_session(text, query, meta)
_bounded_summary(text, meta)
for _, _, text, meta in tasks
]
return await asyncio.gather(*coros, return_exceptions=True)