mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
Salvage of #3389 by @binhnt92 with reasoning fallback and retry logic added on top. All 7 auxiliary LLM call sites now use extract_content_or_reasoning() which mirrors the main agent loop's behavior: extract content, strip think blocks, fall back to structured reasoning fields, retry on empty. Closes #3389.
This commit is contained in:
@@ -52,6 +52,7 @@ import asyncio
|
||||
import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key
|
||||
from agent.auxiliary_client import extract_content_or_reasoning
|
||||
from tools.debug_helpers import DebugSession
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -143,7 +144,13 @@ async def _run_reference_model_safe(
|
||||
|
||||
response = await _get_openrouter_client().chat.completions.create(**api_params)
|
||||
|
||||
content = response.choices[0].message.content.strip()
|
||||
content = extract_content_or_reasoning(response)
|
||||
if not content:
|
||||
# Reasoning-only response — let the retry loop handle it
|
||||
logger.warning("%s returned empty content (attempt %s/%s), retrying", model, attempt + 1, max_retries)
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(min(2 ** (attempt + 1), 60))
|
||||
continue
|
||||
logger.info("%s responded (%s characters)", model, len(content))
|
||||
return model, content, True
|
||||
|
||||
@@ -211,7 +218,14 @@ async def _run_aggregator_model(
|
||||
|
||||
response = await _get_openrouter_client().chat.completions.create(**api_params)
|
||||
|
||||
content = response.choices[0].message.content.strip()
|
||||
content = extract_content_or_reasoning(response)
|
||||
|
||||
# Retry once on empty content (reasoning-only response)
|
||||
if not content:
|
||||
logger.warning("Aggregator returned empty content, retrying once")
|
||||
response = await _get_openrouter_client().chat.completions.create(**api_params)
|
||||
content = extract_content_or_reasoning(response)
|
||||
|
||||
logger.info("Aggregation complete (%s characters)", len(content))
|
||||
return content
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ import json
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
|
||||
from agent.auxiliary_client import async_call_llm
|
||||
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
|
||||
MAX_SESSION_CHARS = 100_000
|
||||
MAX_SUMMARY_TOKENS = 10000
|
||||
|
||||
@@ -161,7 +161,15 @@ async def _summarize_session(
|
||||
temperature=0.1,
|
||||
max_tokens=MAX_SUMMARY_TOKENS,
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
content = extract_content_or_reasoning(response)
|
||||
if content:
|
||||
return content
|
||||
# Reasoning-only / empty — let the retry loop handle it
|
||||
logging.warning("Session search LLM returned empty content (attempt %d/%d)", attempt + 1, max_retries)
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(1 * (attempt + 1))
|
||||
continue
|
||||
return content
|
||||
except RuntimeError:
|
||||
logging.warning("No auxiliary model available for session summarization")
|
||||
return None
|
||||
|
||||
@@ -948,9 +948,9 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
|
||||
|
||||
# Call the LLM via the centralized provider router
|
||||
try:
|
||||
from agent.auxiliary_client import call_llm
|
||||
from agent.auxiliary_client import call_llm, extract_content_or_reasoning
|
||||
|
||||
response = call_llm(
|
||||
call_kwargs = dict(
|
||||
provider="openrouter",
|
||||
model=model,
|
||||
messages=[{
|
||||
@@ -960,7 +960,13 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
|
||||
temperature=0,
|
||||
max_tokens=1000,
|
||||
)
|
||||
llm_text = response.choices[0].message.content.strip()
|
||||
response = call_llm(**call_kwargs)
|
||||
llm_text = extract_content_or_reasoning(response)
|
||||
|
||||
# Retry once on empty content (reasoning-only response)
|
||||
if not llm_text:
|
||||
response = call_llm(**call_kwargs)
|
||||
llm_text = extract_content_or_reasoning(response)
|
||||
except Exception:
|
||||
# LLM audit is best-effort — don't block install if the call fails
|
||||
return static_result
|
||||
|
||||
@@ -37,7 +37,7 @@ from pathlib import Path
|
||||
from typing import Any, Awaitable, Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
import httpx
|
||||
from agent.auxiliary_client import async_call_llm
|
||||
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
|
||||
from tools.debug_helpers import DebugSession
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -346,8 +346,15 @@ async def vision_analyze_tool(
|
||||
call_kwargs["model"] = model
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
|
||||
# Extract the analysis
|
||||
analysis = response.choices[0].message.content.strip()
|
||||
# Extract the analysis — fall back to reasoning if content is empty
|
||||
analysis = extract_content_or_reasoning(response)
|
||||
|
||||
# Retry once on empty content (reasoning-only response)
|
||||
if not analysis:
|
||||
logger.warning("Vision LLM returned empty content, retrying once")
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
analysis = extract_content_or_reasoning(response)
|
||||
|
||||
analysis_length = len(analysis)
|
||||
|
||||
logger.info("Image analysis completed (%s characters)", analysis_length)
|
||||
|
||||
@@ -44,7 +44,7 @@ import asyncio
|
||||
from typing import List, Dict, Any, Optional
|
||||
import httpx
|
||||
from firecrawl import Firecrawl
|
||||
from agent.auxiliary_client import async_call_llm
|
||||
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
|
||||
from tools.debug_helpers import DebugSession
|
||||
from tools.url_safety import is_safe_url
|
||||
from tools.website_policy import check_website_access
|
||||
@@ -416,7 +416,16 @@ Create a markdown summary that captures all key information in a well-organized,
|
||||
if model:
|
||||
call_kwargs["model"] = model
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
return response.choices[0].message.content.strip()
|
||||
content = extract_content_or_reasoning(response)
|
||||
if content:
|
||||
return content
|
||||
# Reasoning-only / empty response — let the retry loop handle it
|
||||
logger.warning("LLM returned empty content (attempt %d/%d), retrying", attempt + 1, max_retries)
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(retry_delay)
|
||||
retry_delay = min(retry_delay * 2, 60)
|
||||
continue
|
||||
return content # Return whatever we got after exhausting retries
|
||||
except RuntimeError:
|
||||
logger.warning("No auxiliary model available for web content processing")
|
||||
return None
|
||||
@@ -535,8 +544,14 @@ Create a single, unified markdown summary."""
|
||||
if model:
|
||||
call_kwargs["model"] = model
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
final_summary = response.choices[0].message.content.strip()
|
||||
|
||||
final_summary = extract_content_or_reasoning(response)
|
||||
|
||||
# Retry once on empty content (reasoning-only response)
|
||||
if not final_summary:
|
||||
logger.warning("Synthesis LLM returned empty content, retrying once")
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
final_summary = extract_content_or_reasoning(response)
|
||||
|
||||
# Enforce hard cap
|
||||
if len(final_summary) > max_output_size:
|
||||
final_summary = final_summary[:max_output_size] + "\n\n[... summary truncated for context management ...]"
|
||||
|
||||
Reference in New Issue
Block a user