mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-16 23:21:32 +08:00
Compare commits
9 Commits
fix/matrix
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e245885f3e | ||
|
|
d4bb44d4b9 | ||
|
|
6693e2a497 | ||
|
|
55fac8a386 | ||
|
|
50bb4fe010 | ||
|
|
06e1d9cdd4 | ||
|
|
69f3aaa1d6 | ||
|
|
c94936839c | ||
|
|
d7607292d9 |
@@ -89,6 +89,15 @@
|
||||
# Optional base URL override:
|
||||
# HERMES_QWEN_BASE_URL=https://portal.qwen.ai/v1
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Xiaomi MiMo)
|
||||
# =============================================================================
|
||||
# Xiaomi MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash).
|
||||
# Get your key at: https://platform.xiaomimimo.com
|
||||
# XIAOMI_API_KEY=your_key_here
|
||||
# Optional base URL override:
|
||||
# XIAOMI_BASE_URL=https://api.xiaomimimo.com/v1
|
||||
|
||||
# =============================================================================
|
||||
# TOOL API KEYS
|
||||
# =============================================================================
|
||||
|
||||
@@ -23,17 +23,13 @@ Resolution order for vision/multimodal tasks (auto mode):
|
||||
6. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
|
||||
7. None
|
||||
|
||||
Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
|
||||
CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task.
|
||||
Per-task overrides are configured in config.yaml under the ``auxiliary:`` section
|
||||
(e.g. ``auxiliary.vision.provider``, ``auxiliary.compression.model``).
|
||||
Default "auto" follows the chains above.
|
||||
|
||||
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
|
||||
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
|
||||
than the provider's default.
|
||||
|
||||
Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
|
||||
AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
|
||||
custom OpenAI-compatible endpoint without touching the main model settings.
|
||||
Legacy env var overrides (AUXILIARY_{TASK}_PROVIDER, AUXILIARY_{TASK}_MODEL,
|
||||
AUXILIARY_{TASK}_BASE_URL, etc.) are still read as a backward-compat fallback
|
||||
but config.yaml takes priority. New configuration should always use config.yaml.
|
||||
|
||||
Payment / credit exhaustion fallback:
|
||||
When a resolved provider returns HTTP 402 or a credit-related error,
|
||||
@@ -111,6 +107,14 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"kilocode": "google/gemini-3-flash-preview",
|
||||
}
|
||||
|
||||
# Vision-specific model overrides for direct providers.
|
||||
# When the user's main provider has a dedicated vision/multimodal model that
|
||||
# differs from their main chat model, map it here. The vision auto-detect
|
||||
# "exotic provider" branch checks this before falling back to the main model.
|
||||
_PROVIDER_VISION_MODELS: Dict[str, str] = {
|
||||
"xiaomi": "mimo-v2-omni",
|
||||
}
|
||||
|
||||
# OpenRouter app attribution headers
|
||||
_OR_HEADERS = {
|
||||
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
|
||||
@@ -1687,16 +1691,18 @@ def resolve_vision_provider_client(
|
||||
if sync_client is not None:
|
||||
return _finalize(main_provider, sync_client, default_model)
|
||||
else:
|
||||
# Exotic provider (DeepSeek, Alibaba, named custom, etc.)
|
||||
# Exotic provider (DeepSeek, Alibaba, Xiaomi, named custom, etc.)
|
||||
# Use provider-specific vision model if available, otherwise main model.
|
||||
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, main_model)
|
||||
main_provider, vision_model)
|
||||
if rpc_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using active provider %s (%s)",
|
||||
main_provider, rpc_model or main_model,
|
||||
main_provider, rpc_model or vision_model,
|
||||
)
|
||||
return _finalize(
|
||||
main_provider, rpc_client, rpc_model or main_model)
|
||||
main_provider, rpc_client, rpc_model or vision_model)
|
||||
|
||||
# Fall back through aggregators.
|
||||
for candidate in _VISION_AUTO_PROVIDER_ORDER:
|
||||
@@ -1958,8 +1964,8 @@ def _resolve_task_provider_model(
|
||||
|
||||
Priority:
|
||||
1. Explicit provider/model/base_url/api_key args (always win)
|
||||
2. Env var overrides (AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
|
||||
3. Config file (auxiliary.{task}.* or compression.*)
|
||||
2. Config file (auxiliary.{task}.* or compression.*)
|
||||
3. Env var overrides (backward-compat: AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
|
||||
4. "auto" (full auto-detection chain)
|
||||
|
||||
Returns (provider, model, base_url, api_key, api_mode) where model may
|
||||
@@ -2002,10 +2008,11 @@ def _resolve_task_provider_model(
|
||||
_sbu = comp.get("summary_base_url") or ""
|
||||
cfg_base_url = cfg_base_url or _sbu.strip() or None
|
||||
|
||||
# Env vars are backward-compat fallback only — config.yaml is primary.
|
||||
env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
|
||||
env_api_mode = _get_auxiliary_env_override(task, "API_MODE") if task else None
|
||||
resolved_model = model or env_model or cfg_model
|
||||
resolved_api_mode = env_api_mode or cfg_api_mode
|
||||
resolved_model = model or cfg_model or env_model
|
||||
resolved_api_mode = cfg_api_mode or env_api_mode
|
||||
|
||||
if base_url:
|
||||
return "custom", resolved_model, base_url, api_key, resolved_api_mode
|
||||
@@ -2013,19 +2020,23 @@ def _resolve_task_provider_model(
|
||||
return provider, resolved_model, base_url, api_key, resolved_api_mode
|
||||
|
||||
if task:
|
||||
# Config.yaml is the primary source for per-task overrides.
|
||||
if cfg_base_url:
|
||||
return "custom", resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
|
||||
if cfg_provider and cfg_provider != "auto":
|
||||
return cfg_provider, resolved_model, None, None, resolved_api_mode
|
||||
|
||||
# Env vars are backward-compat fallback for users who haven't
|
||||
# migrated to config.yaml yet.
|
||||
env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
|
||||
env_api_key = _get_auxiliary_env_override(task, "API_KEY")
|
||||
if env_base_url:
|
||||
return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key, resolved_api_mode
|
||||
return "custom", resolved_model, env_base_url, env_api_key, resolved_api_mode
|
||||
|
||||
env_provider = _get_auxiliary_provider(task)
|
||||
if env_provider != "auto":
|
||||
return env_provider, resolved_model, None, None, resolved_api_mode
|
||||
|
||||
if cfg_base_url:
|
||||
return "custom", resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
|
||||
if cfg_provider and cfg_provider != "auto":
|
||||
return cfg_provider, resolved_model, None, None, resolved_api_mode
|
||||
return "auto", resolved_model, None, None, resolved_api_mode
|
||||
|
||||
return "auto", resolved_model, None, None, resolved_api_mode
|
||||
|
||||
@@ -27,12 +27,14 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"gemini", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||
"qwen-oauth",
|
||||
"xiaomi",
|
||||
"custom", "local",
|
||||
# Common aliases
|
||||
"google", "google-gemini", "google-ai-studio",
|
||||
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
|
||||
"github-models", "kimi", "moonshot", "claude", "deep-seek",
|
||||
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"mimo", "xiaomi-mimo",
|
||||
"qwen-portal",
|
||||
})
|
||||
|
||||
@@ -149,9 +151,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"moonshotai/Kimi-K2.5": 262144,
|
||||
"moonshotai/Kimi-K2-Thinking": 262144,
|
||||
"MiniMaxAI/MiniMax-M2.5": 204800,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 32768,
|
||||
"mimo-v2-pro": 1048576,
|
||||
"mimo-v2-omni": 1048576,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 256000,
|
||||
"mimo-v2-pro": 1000000,
|
||||
"mimo-v2-omni": 256000,
|
||||
"mimo-v2-flash": 256000,
|
||||
"zai-org/GLM-5": 202752,
|
||||
}
|
||||
|
||||
@@ -211,6 +214,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"api.fireworks.ai": "fireworks",
|
||||
"opencode.ai": "opencode-go",
|
||||
"api.x.ai": "xai",
|
||||
"api.xiaomimimo.com": "xiaomi",
|
||||
"xiaomimimo.com": "xiaomi",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -161,6 +161,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"gemini": "google",
|
||||
"google": "google",
|
||||
"xai": "xai",
|
||||
"xiaomi": "xiaomi",
|
||||
"nvidia": "nvidia",
|
||||
"groq": "groq",
|
||||
"mistral": "mistral",
|
||||
@@ -383,7 +384,14 @@ def get_model_capabilities(provider: str, model: str) -> Optional[ModelCapabilit
|
||||
|
||||
# Extract capability flags (default to False if missing)
|
||||
supports_tools = bool(entry.get("tool_call", False))
|
||||
supports_vision = bool(entry.get("attachment", False))
|
||||
# Vision: check both the `attachment` flag and `modalities.input` for "image".
|
||||
# Some models (e.g. gemma-4) list image in input modalities but not attachment.
|
||||
input_mods = entry.get("modalities", {})
|
||||
if isinstance(input_mods, dict):
|
||||
input_mods = input_mods.get("input", [])
|
||||
else:
|
||||
input_mods = []
|
||||
supports_vision = bool(entry.get("attachment", False)) or "image" in input_mods
|
||||
supports_reasoning = bool(entry.get("reasoning", False))
|
||||
|
||||
# Extract limits
|
||||
|
||||
@@ -24,6 +24,7 @@ model:
|
||||
# "minimax" - MiniMax global (requires: MINIMAX_API_KEY)
|
||||
# "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY)
|
||||
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
|
||||
# "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY)
|
||||
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
|
||||
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
|
||||
#
|
||||
|
||||
@@ -190,7 +190,7 @@ class StreamingConfig:
|
||||
"""Configuration for real-time token streaming to messaging platforms."""
|
||||
enabled: bool = False
|
||||
transport: str = "edit" # "edit" (progressive editMessageText) or "off"
|
||||
edit_interval: float = 0.3 # Seconds between message edits
|
||||
edit_interval: float = 1.0 # Seconds between message edits (Telegram rate-limits at ~1/s)
|
||||
buffer_threshold: int = 40 # Chars before forcing an edit
|
||||
cursor: str = " ▉" # Cursor shown during streaming
|
||||
|
||||
@@ -210,7 +210,7 @@ class StreamingConfig:
|
||||
return cls(
|
||||
enabled=data.get("enabled", False),
|
||||
transport=data.get("transport", "edit"),
|
||||
edit_interval=float(data.get("edit_interval", 0.3)),
|
||||
edit_interval=float(data.get("edit_interval", 1.0)),
|
||||
buffer_threshold=int(data.get("buffer_threshold", 40)),
|
||||
cursor=data.get("cursor", " ▉"),
|
||||
)
|
||||
|
||||
@@ -352,7 +352,16 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
from mautrix.crypto import OlmMachine
|
||||
from mautrix.crypto.store import MemoryCryptoStore
|
||||
|
||||
crypto_store = MemoryCryptoStore()
|
||||
# account_id and pickle_key are required by mautrix ≥0.21.
|
||||
# Use the Matrix user ID as account_id for stable identity.
|
||||
# pickle_key secures in-memory serialisation; derive from
|
||||
# the same user_id:device_id pair used for the on-disk HMAC.
|
||||
_acct_id = self._user_id or "hermes"
|
||||
_pickle_key = f"{_acct_id}:{self._device_id}"
|
||||
crypto_store = MemoryCryptoStore(
|
||||
account_id=_acct_id,
|
||||
pickle_key=_pickle_key,
|
||||
)
|
||||
|
||||
# Restore persisted crypto state from a previous run.
|
||||
# Uses HMAC to verify integrity before unpickling.
|
||||
@@ -418,6 +427,11 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
if isinstance(sync_data, dict):
|
||||
rooms_join = sync_data.get("rooms", {}).get("join", {})
|
||||
self._joined_rooms = set(rooms_join.keys())
|
||||
# Store the next_batch token so incremental syncs start
|
||||
# from where the initial sync left off.
|
||||
nb = sync_data.get("next_batch")
|
||||
if nb:
|
||||
await client.sync_store.put_next_batch(nb)
|
||||
logger.info(
|
||||
"Matrix: initial sync complete, joined %d rooms",
|
||||
len(self._joined_rooms),
|
||||
@@ -809,19 +823,40 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
|
||||
async def _sync_loop(self) -> None:
|
||||
"""Continuously sync with the homeserver."""
|
||||
client = self._client
|
||||
# Resume from the token stored during the initial sync.
|
||||
next_batch = await client.sync_store.get_next_batch()
|
||||
while not self._closing:
|
||||
try:
|
||||
sync_data = await self._client.sync(timeout=30000)
|
||||
sync_data = await client.sync(
|
||||
since=next_batch, timeout=30000,
|
||||
)
|
||||
if isinstance(sync_data, dict):
|
||||
# Update joined rooms from sync response.
|
||||
rooms_join = sync_data.get("rooms", {}).get("join", {})
|
||||
if rooms_join:
|
||||
self._joined_rooms.update(rooms_join.keys())
|
||||
|
||||
# Share keys periodically if E2EE is enabled.
|
||||
if self._encryption and getattr(self._client, "crypto", None):
|
||||
# Advance the sync token so the next request is
|
||||
# incremental instead of a full initial sync.
|
||||
nb = sync_data.get("next_batch")
|
||||
if nb:
|
||||
next_batch = nb
|
||||
await client.sync_store.put_next_batch(nb)
|
||||
|
||||
# Dispatch events to registered handlers so that
|
||||
# _on_room_message / _on_reaction / _on_invite fire.
|
||||
try:
|
||||
await self._client.crypto.share_keys()
|
||||
tasks = client.handle_sync(sync_data)
|
||||
if tasks:
|
||||
await asyncio.gather(*tasks)
|
||||
except Exception as exc:
|
||||
logger.warning("Matrix: sync event dispatch error: %s", exc)
|
||||
|
||||
# Share keys periodically if E2EE is enabled.
|
||||
if self._encryption and getattr(client, "crypto", None):
|
||||
try:
|
||||
await client.crypto.share_keys()
|
||||
except Exception as exc:
|
||||
logger.warning("Matrix: E2EE key share failed: %s", exc)
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ _NEW_SEGMENT = object()
|
||||
@dataclass
|
||||
class StreamConsumerConfig:
|
||||
"""Runtime config for a single stream consumer instance."""
|
||||
edit_interval: float = 0.3
|
||||
edit_interval: float = 1.0
|
||||
buffer_threshold: int = 40
|
||||
cursor: str = " ▉"
|
||||
|
||||
@@ -56,6 +56,10 @@ class GatewayStreamConsumer:
|
||||
await task # wait for final edit
|
||||
"""
|
||||
|
||||
# After this many consecutive flood-control failures, permanently disable
|
||||
# progressive edits for the remainder of the stream.
|
||||
_MAX_FLOOD_STRIKES = 3
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
adapter: Any,
|
||||
@@ -76,6 +80,8 @@ class GatewayStreamConsumer:
|
||||
self._last_sent_text = "" # Track last-sent text to skip redundant edits
|
||||
self._fallback_final_send = False
|
||||
self._fallback_prefix = ""
|
||||
self._flood_strikes = 0 # Consecutive flood-control edit failures
|
||||
self._current_edit_interval = self.cfg.edit_interval # Adaptive backoff
|
||||
|
||||
@property
|
||||
def already_sent(self) -> bool:
|
||||
@@ -129,7 +135,7 @@ class GatewayStreamConsumer:
|
||||
should_edit = (
|
||||
got_done
|
||||
or got_segment_break
|
||||
or (elapsed >= self.cfg.edit_interval
|
||||
or (elapsed >= self._current_edit_interval
|
||||
and self._accumulated)
|
||||
or len(self._accumulated) >= self.cfg.buffer_threshold
|
||||
)
|
||||
@@ -173,12 +179,13 @@ class GatewayStreamConsumer:
|
||||
if split_at < _safe_limit // 2:
|
||||
split_at = _safe_limit
|
||||
chunk = self._accumulated[:split_at]
|
||||
await self._send_or_edit(chunk)
|
||||
if self._fallback_final_send:
|
||||
# Edit failed while attempting to split an oversized
|
||||
# message. Keep the full accumulated text intact so
|
||||
# the fallback final-send path can deliver the
|
||||
# remaining continuation without dropping content.
|
||||
ok = await self._send_or_edit(chunk)
|
||||
if self._fallback_final_send or not ok:
|
||||
# Edit failed (or backed off due to flood control)
|
||||
# while attempting to split an oversized message.
|
||||
# Keep the full accumulated text intact so the
|
||||
# fallback final-send path can deliver the remaining
|
||||
# continuation without dropping content.
|
||||
break
|
||||
self._accumulated = self._accumulated[split_at:].lstrip("\n")
|
||||
self._message_id = None
|
||||
@@ -322,7 +329,10 @@ class GatewayStreamConsumer:
|
||||
return chunks
|
||||
|
||||
async def _send_fallback_final(self, text: str) -> None:
|
||||
"""Send the final continuation after streaming edits stop working."""
|
||||
"""Send the final continuation after streaming edits stop working.
|
||||
|
||||
Retries each chunk once on flood-control failures with a short delay.
|
||||
"""
|
||||
final_text = self._clean_for_display(text)
|
||||
continuation = self._continuation_text(final_text)
|
||||
self._fallback_final_send = False
|
||||
@@ -339,12 +349,25 @@ class GatewayStreamConsumer:
|
||||
last_successful_chunk = ""
|
||||
sent_any_chunk = False
|
||||
for chunk in chunks:
|
||||
result = await self.adapter.send(
|
||||
chat_id=self.chat_id,
|
||||
content=chunk,
|
||||
metadata=self.metadata,
|
||||
)
|
||||
if not result.success:
|
||||
# Try sending with one retry on flood-control errors.
|
||||
result = None
|
||||
for attempt in range(2):
|
||||
result = await self.adapter.send(
|
||||
chat_id=self.chat_id,
|
||||
content=chunk,
|
||||
metadata=self.metadata,
|
||||
)
|
||||
if result.success:
|
||||
break
|
||||
if attempt == 0 and self._is_flood_error(result):
|
||||
logger.debug(
|
||||
"Flood control on fallback send, retrying in 3s"
|
||||
)
|
||||
await asyncio.sleep(3.0)
|
||||
else:
|
||||
break # non-flood error or second attempt failed
|
||||
|
||||
if not result or not result.success:
|
||||
if sent_any_chunk:
|
||||
# Some continuation text already reached the user. Suppress
|
||||
# the base gateway final-send path so we don't resend the
|
||||
@@ -370,20 +393,52 @@ class GatewayStreamConsumer:
|
||||
self._last_sent_text = chunks[-1]
|
||||
self._fallback_prefix = ""
|
||||
|
||||
async def _send_or_edit(self, text: str) -> None:
|
||||
"""Send or edit the streaming message."""
|
||||
def _is_flood_error(self, result) -> bool:
|
||||
"""Check if a SendResult failure is due to flood control / rate limiting."""
|
||||
err = getattr(result, "error", "") or ""
|
||||
err_lower = err.lower()
|
||||
return "flood" in err_lower or "retry after" in err_lower or "rate" in err_lower
|
||||
|
||||
async def _try_strip_cursor(self) -> None:
|
||||
"""Best-effort edit to remove the cursor from the last visible message.
|
||||
|
||||
Called when entering fallback mode so the user doesn't see a stuck
|
||||
cursor (▉) in the partial message.
|
||||
"""
|
||||
if not self._message_id or self._message_id == "__no_edit__":
|
||||
return
|
||||
prefix = self._visible_prefix()
|
||||
if not prefix or not prefix.strip():
|
||||
return
|
||||
try:
|
||||
await self.adapter.edit_message(
|
||||
chat_id=self.chat_id,
|
||||
message_id=self._message_id,
|
||||
content=prefix,
|
||||
)
|
||||
self._last_sent_text = prefix
|
||||
except Exception:
|
||||
pass # best-effort — don't let this block the fallback path
|
||||
|
||||
async def _send_or_edit(self, text: str) -> bool:
|
||||
"""Send or edit the streaming message.
|
||||
|
||||
Returns True if the text was successfully delivered (sent or edited),
|
||||
False otherwise. Callers like the overflow split loop use this to
|
||||
decide whether to advance past the delivered chunk.
|
||||
"""
|
||||
# Strip MEDIA: directives so they don't appear as visible text.
|
||||
# Media files are delivered as native attachments after the stream
|
||||
# finishes (via _deliver_media_from_response in gateway/run.py).
|
||||
text = self._clean_for_display(text)
|
||||
if not text.strip():
|
||||
return
|
||||
return True # nothing to send is "success"
|
||||
try:
|
||||
if self._message_id is not None:
|
||||
if self._edit_supported:
|
||||
# Skip if text is identical to what we last sent
|
||||
if text == self._last_sent_text:
|
||||
return
|
||||
return True
|
||||
# Edit existing message
|
||||
result = await self.adapter.edit_message(
|
||||
chat_id=self.chat_id,
|
||||
@@ -393,19 +448,52 @@ class GatewayStreamConsumer:
|
||||
if result.success:
|
||||
self._already_sent = True
|
||||
self._last_sent_text = text
|
||||
# Successful edit — reset flood strike counter
|
||||
self._flood_strikes = 0
|
||||
return True
|
||||
else:
|
||||
# If an edit fails mid-stream (especially Telegram flood control),
|
||||
# stop progressive edits and send only the missing tail once the
|
||||
# Edit failed. If this looks like flood control / rate
|
||||
# limiting, use adaptive backoff: double the edit interval
|
||||
# and retry on the next cycle. Only permanently disable
|
||||
# edits after _MAX_FLOOD_STRIKES consecutive failures.
|
||||
if self._is_flood_error(result):
|
||||
self._flood_strikes += 1
|
||||
self._current_edit_interval = min(
|
||||
self._current_edit_interval * 2, 10.0,
|
||||
)
|
||||
logger.debug(
|
||||
"Flood control on edit (strike %d/%d), "
|
||||
"backoff interval → %.1fs",
|
||||
self._flood_strikes,
|
||||
self._MAX_FLOOD_STRIKES,
|
||||
self._current_edit_interval,
|
||||
)
|
||||
if self._flood_strikes < self._MAX_FLOOD_STRIKES:
|
||||
# Don't disable edits yet — just slow down.
|
||||
# Update _last_edit_time so the next edit
|
||||
# respects the new interval.
|
||||
self._last_edit_time = time.monotonic()
|
||||
return False
|
||||
|
||||
# Non-flood error OR flood strikes exhausted: enter
|
||||
# fallback mode — send only the missing tail once the
|
||||
# final response is available.
|
||||
logger.debug("Edit failed, disabling streaming for this adapter")
|
||||
logger.debug(
|
||||
"Edit failed (strikes=%d), entering fallback mode",
|
||||
self._flood_strikes,
|
||||
)
|
||||
self._fallback_prefix = self._visible_prefix()
|
||||
self._fallback_final_send = True
|
||||
self._edit_supported = False
|
||||
self._already_sent = True
|
||||
# Best-effort: strip the cursor from the last visible
|
||||
# message so the user doesn't see a stuck ▉.
|
||||
await self._try_strip_cursor()
|
||||
return False
|
||||
else:
|
||||
# Editing not supported — skip intermediate updates.
|
||||
# The final response will be sent by the fallback path.
|
||||
pass
|
||||
return False
|
||||
else:
|
||||
# First message — send new
|
||||
result = await self.adapter.send(
|
||||
@@ -417,6 +505,7 @@ class GatewayStreamConsumer:
|
||||
self._message_id = result.message_id
|
||||
self._already_sent = True
|
||||
self._last_sent_text = text
|
||||
return True
|
||||
elif result.success:
|
||||
# Platform accepted the message but returned no message_id
|
||||
# (e.g. Signal). Can't edit without an ID — switch to
|
||||
@@ -428,8 +517,11 @@ class GatewayStreamConsumer:
|
||||
self._fallback_final_send = True
|
||||
# Sentinel prevents re-entering this branch on every delta
|
||||
self._message_id = "__no_edit__"
|
||||
return True # platform accepted, just can't edit
|
||||
else:
|
||||
# Initial send failed — disable streaming for this session
|
||||
self._edit_supported = False
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error("Stream send/edit error: %s", e)
|
||||
return False
|
||||
|
||||
@@ -250,6 +250,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=("HF_TOKEN",),
|
||||
base_url_env_var="HF_BASE_URL",
|
||||
),
|
||||
"xiaomi": ProviderConfig(
|
||||
id="xiaomi",
|
||||
name="Xiaomi MiMo",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://api.xiaomimimo.com/v1",
|
||||
api_key_env_vars=("XIAOMI_API_KEY",),
|
||||
base_url_env_var="XIAOMI_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -908,6 +916,7 @@ def resolve_provider(
|
||||
"opencode": "opencode-zen", "zen": "opencode-zen",
|
||||
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
|
||||
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
||||
"mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
|
||||
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||
# Local server aliases — route through the generic custom provider
|
||||
|
||||
@@ -32,7 +32,6 @@ _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
||||
_EXTRA_ENV_KEYS = frozenset({
|
||||
"OPENAI_API_KEY", "OPENAI_BASE_URL",
|
||||
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
|
||||
"AUXILIARY_VISION_MODEL",
|
||||
"DISCORD_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL",
|
||||
"SIGNAL_ACCOUNT", "SIGNAL_HTTP_URL",
|
||||
"SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
|
||||
@@ -381,7 +380,7 @@ DEFAULT_CONFIG = {
|
||||
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
|
||||
"base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider)
|
||||
"api_key": "", # API key for base_url (falls back to OPENAI_API_KEY)
|
||||
"timeout": 30, # seconds — LLM API call timeout; increase for slow local vision models
|
||||
"timeout": 120, # seconds — LLM API call timeout; vision payloads need generous timeout
|
||||
"download_timeout": 30, # seconds — image HTTP download timeout; increase for slow connections
|
||||
},
|
||||
"web_extract": {
|
||||
@@ -868,6 +867,21 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"XIAOMI_API_KEY": {
|
||||
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
|
||||
"prompt": "Xiaomi MiMo API Key",
|
||||
"url": "https://platform.xiaomimimo.com",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
},
|
||||
"XIAOMI_BASE_URL": {
|
||||
"description": "Xiaomi MiMo base URL override (default: https://api.xiaomimimo.com/v1)",
|
||||
"prompt": "Xiaomi base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Tool API keys ──
|
||||
"EXA_API_KEY": {
|
||||
|
||||
@@ -51,6 +51,7 @@ _PROVIDER_ENV_HINTS = (
|
||||
"AI_GATEWAY_API_KEY",
|
||||
"OPENCODE_ZEN_API_KEY",
|
||||
"OPENCODE_GO_API_KEY",
|
||||
"XIAOMI_API_KEY",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -934,6 +934,7 @@ def select_provider_and_model(args=None):
|
||||
"kilocode": "Kilo Code",
|
||||
"alibaba": "Alibaba Cloud (DashScope)",
|
||||
"huggingface": "Hugging Face",
|
||||
"xiaomi": "Xiaomi MiMo",
|
||||
"custom": "Custom endpoint",
|
||||
}
|
||||
active_label = provider_labels.get(active, active) if active else "none"
|
||||
@@ -966,6 +967,7 @@ def select_provider_and_model(args=None):
|
||||
("opencode-go", "OpenCode Go (open models, $10/month subscription)"),
|
||||
("ai-gateway", "AI Gateway (Vercel — 200+ models, pay-per-use)"),
|
||||
("alibaba", "Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
||||
("xiaomi", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
|
||||
]
|
||||
|
||||
def _named_custom_provider_map(cfg) -> dict[str, dict[str, str]]:
|
||||
@@ -1077,7 +1079,7 @@ def select_provider_and_model(args=None):
|
||||
_model_flow_anthropic(config, current_model)
|
||||
elif selected_provider == "kimi-coding":
|
||||
_model_flow_kimi(config, current_model)
|
||||
elif selected_provider in ("gemini", "zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface"):
|
||||
elif selected_provider in ("gemini", "zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi"):
|
||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||
|
||||
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
|
||||
@@ -4357,7 +4359,7 @@ For more help on a command:
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--provider",
|
||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"],
|
||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "xiaomi"],
|
||||
default=None,
|
||||
help="Inference provider (default: auto)"
|
||||
)
|
||||
|
||||
@@ -92,6 +92,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
|
||||
"minimax-cn",
|
||||
"alibaba",
|
||||
"qwen-oauth",
|
||||
"xiaomi",
|
||||
"custom",
|
||||
})
|
||||
|
||||
|
||||
@@ -56,6 +56,18 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
|
||||
_openrouter_catalog_cache: list[tuple[str, str]] | None = None
|
||||
|
||||
|
||||
def _codex_curated_models() -> list[str]:
|
||||
"""Derive the openai-codex curated list from codex_models.py.
|
||||
|
||||
Single source of truth: DEFAULT_CODEX_MODELS + forward-compat synthesis.
|
||||
This keeps the gateway /model picker in sync with the CLI `hermes model`
|
||||
flow without maintaining a separate static list.
|
||||
"""
|
||||
from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, _add_forward_compat_models
|
||||
return _add_forward_compat_models(list(DEFAULT_CODEX_MODELS))
|
||||
|
||||
|
||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"nous": [
|
||||
"anthropic/claude-opus-4.6",
|
||||
@@ -86,14 +98,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"openai/gpt-5.4-pro",
|
||||
"openai/gpt-5.4-nano",
|
||||
],
|
||||
"openai-codex": [
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.3-codex",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.1-codex-mini",
|
||||
"gpt-5.1-codex-max",
|
||||
],
|
||||
"openai-codex": _codex_curated_models(),
|
||||
"copilot-acp": [
|
||||
"copilot-acp",
|
||||
],
|
||||
@@ -183,6 +188,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"deepseek-chat",
|
||||
"deepseek-reasoner",
|
||||
],
|
||||
"xiaomi": [
|
||||
"mimo-v2-pro",
|
||||
"mimo-v2-omni",
|
||||
"mimo-v2-flash",
|
||||
],
|
||||
"opencode-zen": [
|
||||
"gpt-5.4-pro",
|
||||
"gpt-5.4",
|
||||
@@ -488,6 +498,7 @@ _PROVIDER_LABELS = {
|
||||
"alibaba": "Alibaba Cloud (DashScope)",
|
||||
"qwen-oauth": "Qwen OAuth (Portal)",
|
||||
"huggingface": "Hugging Face",
|
||||
"xiaomi": "Xiaomi MiMo",
|
||||
"custom": "Custom endpoint",
|
||||
}
|
||||
|
||||
@@ -530,6 +541,8 @@ _PROVIDER_ALIASES = {
|
||||
"hf": "huggingface",
|
||||
"hugging-face": "huggingface",
|
||||
"huggingface-hub": "huggingface",
|
||||
"mimo": "xiaomi",
|
||||
"xiaomi-mimo": "xiaomi",
|
||||
}
|
||||
|
||||
|
||||
@@ -814,7 +827,7 @@ def list_available_providers() -> list[dict[str, str]]:
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "huggingface",
|
||||
"zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
|
||||
"qwen-oauth",
|
||||
"qwen-oauth", "xiaomi",
|
||||
"opencode-zen", "opencode-go",
|
||||
"ai-gateway", "deepseek", "custom",
|
||||
]
|
||||
|
||||
@@ -132,6 +132,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
base_url_override="https://api.x.ai/v1",
|
||||
base_url_env_var="XAI_BASE_URL",
|
||||
),
|
||||
"xiaomi": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_env_var="XIAOMI_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -222,6 +226,10 @@ ALIASES: Dict[str, str] = {
|
||||
"hugging-face": "huggingface",
|
||||
"huggingface-hub": "huggingface",
|
||||
|
||||
# xiaomi
|
||||
"mimo": "xiaomi",
|
||||
"xiaomi-mimo": "xiaomi",
|
||||
|
||||
# Local server aliases → virtual "local" concept (resolved via user config)
|
||||
"lmstudio": "lmstudio",
|
||||
"lm-studio": "lmstudio",
|
||||
@@ -242,6 +250,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
|
||||
"nous": "Nous Portal",
|
||||
"openai-codex": "OpenAI Codex",
|
||||
"copilot-acp": "GitHub Copilot ACP",
|
||||
"xiaomi": "Xiaomi MiMo",
|
||||
"local": "Local endpoint",
|
||||
}
|
||||
|
||||
|
||||
37
run_agent.py
37
run_agent.py
@@ -700,10 +700,14 @@ class AIAgent:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Direct OpenAI sessions use the Responses API path. GPT-5.x tool
|
||||
# calls with reasoning are rejected on /v1/chat/completions, and
|
||||
# Hermes is a tool-using client by default.
|
||||
if self.api_mode == "chat_completions" and self._is_direct_openai_url():
|
||||
# GPT-5.x models require the Responses API path — they are rejected
|
||||
# on /v1/chat/completions by both OpenAI and OpenRouter. Also
|
||||
# auto-upgrade for direct OpenAI URLs (api.openai.com) since all
|
||||
# newer tool-calling models prefer Responses there.
|
||||
if self.api_mode == "chat_completions" and (
|
||||
self._is_direct_openai_url()
|
||||
or self._model_requires_responses_api(self.model)
|
||||
):
|
||||
self.api_mode = "codex_responses"
|
||||
|
||||
# Pre-warm OpenRouter model metadata cache in a background thread.
|
||||
@@ -1702,6 +1706,21 @@ class AIAgent:
|
||||
"""Return True when the base URL targets OpenRouter."""
|
||||
return "openrouter" in self._base_url_lower
|
||||
|
||||
@staticmethod
|
||||
def _model_requires_responses_api(model: str) -> bool:
|
||||
"""Return True for models that require the Responses API path.
|
||||
|
||||
GPT-5.x models are rejected on /v1/chat/completions by both
|
||||
OpenAI and OpenRouter (error: ``unsupported_api_for_model``).
|
||||
Detect these so the correct api_mode is set regardless of
|
||||
which provider is serving the model.
|
||||
"""
|
||||
m = model.lower()
|
||||
# Strip vendor prefix (e.g. "openai/gpt-5.4" → "gpt-5.4")
|
||||
if "/" in m:
|
||||
m = m.rsplit("/", 1)[-1]
|
||||
return m.startswith("gpt-5")
|
||||
|
||||
def _max_tokens_param(self, value: int) -> dict:
|
||||
"""Return the correct max tokens kwarg for the current provider.
|
||||
|
||||
@@ -5251,7 +5270,7 @@ class AIAgent:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Determine api_mode from provider / base URL
|
||||
# Determine api_mode from provider / base URL / model
|
||||
fb_api_mode = "chat_completions"
|
||||
fb_base_url = str(fb_client.base_url)
|
||||
if fb_provider == "openai-codex":
|
||||
@@ -5260,6 +5279,10 @@ class AIAgent:
|
||||
fb_api_mode = "anthropic_messages"
|
||||
elif self._is_direct_openai_url(fb_base_url):
|
||||
fb_api_mode = "codex_responses"
|
||||
elif self._model_requires_responses_api(fb_model):
|
||||
# GPT-5.x models need Responses API on every provider
|
||||
# (OpenRouter, Copilot, direct OpenAI, etc.)
|
||||
fb_api_mode = "codex_responses"
|
||||
|
||||
old_model = self.model
|
||||
self.model = fb_model
|
||||
@@ -5348,8 +5371,8 @@ class AIAgent:
|
||||
to the fallback provider for every subsequent turn. Calling this at
|
||||
the top of ``run_conversation()`` makes fallback turn-scoped.
|
||||
|
||||
The gateway creates a fresh agent per message so this is a no-op
|
||||
there (``_fallback_activated`` is always False at turn start).
|
||||
The gateway caches agents across messages (``_agent_cache`` in
|
||||
``gateway/run.py``), so this restoration IS needed there too.
|
||||
"""
|
||||
if not self._fallback_activated:
|
||||
return False
|
||||
|
||||
@@ -7,6 +7,7 @@ from agent.models_dev import (
|
||||
PROVIDER_TO_MODELS_DEV,
|
||||
_extract_context,
|
||||
fetch_models_dev,
|
||||
get_model_capabilities,
|
||||
lookup_models_dev_context,
|
||||
)
|
||||
|
||||
@@ -195,3 +196,88 @@ class TestFetchModelsDev:
|
||||
result = fetch_models_dev()
|
||||
mock_get.assert_not_called()
|
||||
assert result == SAMPLE_REGISTRY
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_model_capabilities — vision via modalities.input
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
CAPS_REGISTRY = {
|
||||
"google": {
|
||||
"id": "google",
|
||||
"models": {
|
||||
"gemma-4-31b-it": {
|
||||
"id": "gemma-4-31b-it",
|
||||
"attachment": False,
|
||||
"tool_call": True,
|
||||
"modalities": {"input": ["text", "image"]},
|
||||
"limit": {"context": 128000, "output": 8192},
|
||||
},
|
||||
"gemma-3-1b": {
|
||||
"id": "gemma-3-1b",
|
||||
"tool_call": True,
|
||||
"limit": {"context": 32000, "output": 8192},
|
||||
},
|
||||
},
|
||||
},
|
||||
"anthropic": {
|
||||
"id": "anthropic",
|
||||
"models": {
|
||||
"claude-sonnet-4": {
|
||||
"id": "claude-sonnet-4",
|
||||
"attachment": True,
|
||||
"tool_call": True,
|
||||
"limit": {"context": 200000, "output": 64000},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class TestGetModelCapabilities:
|
||||
"""Tests for get_model_capabilities vision detection."""
|
||||
|
||||
def test_vision_from_attachment_flag(self):
|
||||
"""Models with attachment=True should report supports_vision=True."""
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||
caps = get_model_capabilities("anthropic", "claude-sonnet-4")
|
||||
assert caps is not None
|
||||
assert caps.supports_vision is True
|
||||
|
||||
def test_vision_from_modalities_input_image(self):
|
||||
"""Models with 'image' in modalities.input but attachment=False should
|
||||
still report supports_vision=True (the core fix in this PR)."""
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||
caps = get_model_capabilities("google", "gemma-4-31b-it")
|
||||
assert caps is not None
|
||||
assert caps.supports_vision is True
|
||||
|
||||
def test_no_vision_without_attachment_or_modalities(self):
|
||||
"""Models with neither attachment nor image modality should be non-vision."""
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||
caps = get_model_capabilities("google", "gemma-3-1b")
|
||||
assert caps is not None
|
||||
assert caps.supports_vision is False
|
||||
|
||||
def test_modalities_non_dict_handled(self):
|
||||
"""Non-dict modalities field should not crash."""
|
||||
registry = {
|
||||
"google": {"id": "google", "models": {
|
||||
"weird-model": {
|
||||
"id": "weird-model",
|
||||
"modalities": "text", # not a dict
|
||||
"limit": {"context": 200000, "output": 8192},
|
||||
},
|
||||
}},
|
||||
}
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=registry):
|
||||
caps = get_model_capabilities("gemini", "weird-model")
|
||||
assert caps is not None
|
||||
assert caps.supports_vision is False
|
||||
|
||||
def test_model_not_found_returns_none(self):
|
||||
"""Unknown model should return None."""
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||
caps = get_model_capabilities("anthropic", "nonexistent-model")
|
||||
assert caps is None
|
||||
|
||||
@@ -157,7 +157,9 @@ def _make_fake_mautrix():
|
||||
mautrix_crypto_store = types.ModuleType("mautrix.crypto.store")
|
||||
|
||||
class MemoryCryptoStore:
|
||||
pass
|
||||
def __init__(self, account_id="", pickle_key=""):
|
||||
self.account_id = account_id
|
||||
self.pickle_key = pickle_key
|
||||
|
||||
mautrix_crypto_store.MemoryCryptoStore = MemoryCryptoStore
|
||||
|
||||
@@ -1041,20 +1043,28 @@ class TestMatrixSyncLoop:
|
||||
call_count += 1
|
||||
if call_count >= 1:
|
||||
adapter._closing = True
|
||||
return {"rooms": {"join": {"!room:example.org": {}}}}
|
||||
return {"rooms": {"join": {"!room:example.org": {}}}, "next_batch": "s1234"}
|
||||
|
||||
mock_crypto = MagicMock()
|
||||
mock_crypto.share_keys = AsyncMock()
|
||||
|
||||
mock_sync_store = MagicMock()
|
||||
mock_sync_store.get_next_batch = AsyncMock(return_value=None)
|
||||
mock_sync_store.put_next_batch = AsyncMock()
|
||||
|
||||
fake_client = MagicMock()
|
||||
fake_client.sync = AsyncMock(side_effect=_sync_once)
|
||||
fake_client.crypto = mock_crypto
|
||||
fake_client.sync_store = mock_sync_store
|
||||
fake_client.handle_sync = MagicMock(return_value=[])
|
||||
adapter._client = fake_client
|
||||
|
||||
await adapter._sync_loop()
|
||||
|
||||
fake_client.sync.assert_awaited_once()
|
||||
mock_crypto.share_keys.assert_awaited_once()
|
||||
fake_client.handle_sync.assert_called_once()
|
||||
mock_sync_store.put_next_batch.assert_awaited_once_with("s1234")
|
||||
|
||||
|
||||
class TestMatrixEncryptedSendFallback:
|
||||
|
||||
327
tests/hermes_cli/test_xiaomi_provider.py
Normal file
327
tests/hermes_cli/test_xiaomi_provider.py
Normal file
@@ -0,0 +1,327 @@
|
||||
"""Tests for Xiaomi MiMo provider support."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
|
||||
import pytest
|
||||
|
||||
# Ensure dotenv doesn't interfere
|
||||
if "dotenv" not in sys.modules:
|
||||
fake_dotenv = types.ModuleType("dotenv")
|
||||
fake_dotenv.load_dotenv = lambda *args, **kwargs: None
|
||||
sys.modules["dotenv"] = fake_dotenv
|
||||
|
||||
from hermes_cli.auth import (
|
||||
PROVIDER_REGISTRY,
|
||||
resolve_provider,
|
||||
get_api_key_provider_status,
|
||||
resolve_api_key_provider_credentials,
|
||||
AuthError,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Provider Registry
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestXiaomiProviderRegistry:
|
||||
"""Verify Xiaomi is registered correctly in the PROVIDER_REGISTRY."""
|
||||
|
||||
def test_registered(self):
|
||||
assert "xiaomi" in PROVIDER_REGISTRY
|
||||
|
||||
def test_name(self):
|
||||
assert PROVIDER_REGISTRY["xiaomi"].name == "Xiaomi MiMo"
|
||||
|
||||
def test_auth_type(self):
|
||||
assert PROVIDER_REGISTRY["xiaomi"].auth_type == "api_key"
|
||||
|
||||
def test_inference_base_url(self):
|
||||
assert PROVIDER_REGISTRY["xiaomi"].inference_base_url == "https://api.xiaomimimo.com/v1"
|
||||
|
||||
def test_api_key_env_vars(self):
|
||||
assert PROVIDER_REGISTRY["xiaomi"].api_key_env_vars == ("XIAOMI_API_KEY",)
|
||||
|
||||
def test_base_url_env_var(self):
|
||||
assert PROVIDER_REGISTRY["xiaomi"].base_url_env_var == "XIAOMI_BASE_URL"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Aliases
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestXiaomiAliases:
|
||||
"""All aliases should resolve to 'xiaomi'."""
|
||||
|
||||
@pytest.mark.parametrize("alias", [
|
||||
"xiaomi", "mimo", "xiaomi-mimo",
|
||||
])
|
||||
def test_alias_resolves(self, alias, monkeypatch):
|
||||
# Clear env to avoid auto-detection interfering
|
||||
for key in ("XIAOMI_API_KEY",):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("XIAOMI_API_KEY", "sk-test-key-12345678")
|
||||
assert resolve_provider(alias) == "xiaomi"
|
||||
|
||||
def test_normalize_provider_models_py(self):
|
||||
from hermes_cli.models import normalize_provider
|
||||
assert normalize_provider("mimo") == "xiaomi"
|
||||
assert normalize_provider("xiaomi-mimo") == "xiaomi"
|
||||
|
||||
def test_normalize_provider_providers_py(self):
|
||||
from hermes_cli.providers import normalize_provider
|
||||
assert normalize_provider("mimo") == "xiaomi"
|
||||
assert normalize_provider("xiaomi-mimo") == "xiaomi"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Auto-detection
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestXiaomiAutoDetection:
|
||||
"""Setting XIAOMI_API_KEY should auto-detect the provider."""
|
||||
|
||||
def test_auto_detect(self, monkeypatch):
|
||||
# Clear all other provider env vars
|
||||
for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
|
||||
"DEEPSEEK_API_KEY", "GOOGLE_API_KEY", "GEMINI_API_KEY",
|
||||
"DASHSCOPE_API_KEY", "XAI_API_KEY", "KIMI_API_KEY",
|
||||
"MINIMAX_API_KEY", "AI_GATEWAY_API_KEY", "KILOCODE_API_KEY",
|
||||
"HF_TOKEN", "GLM_API_KEY", "COPILOT_GITHUB_TOKEN",
|
||||
"GH_TOKEN", "GITHUB_TOKEN", "MINIMAX_CN_API_KEY"):
|
||||
monkeypatch.delenv(var, raising=False)
|
||||
monkeypatch.setenv("XIAOMI_API_KEY", "sk-xiaomi-test-12345678")
|
||||
provider = resolve_provider("auto")
|
||||
assert provider == "xiaomi"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Credentials
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestXiaomiCredentials:
|
||||
"""Test credential resolution for the xiaomi provider."""
|
||||
|
||||
def test_status_configured(self, monkeypatch):
|
||||
monkeypatch.setenv("XIAOMI_API_KEY", "sk-test-12345678")
|
||||
status = get_api_key_provider_status("xiaomi")
|
||||
assert status["configured"]
|
||||
|
||||
def test_status_not_configured(self, monkeypatch):
|
||||
monkeypatch.delenv("XIAOMI_API_KEY", raising=False)
|
||||
status = get_api_key_provider_status("xiaomi")
|
||||
assert not status["configured"]
|
||||
|
||||
def test_resolve_credentials(self, monkeypatch):
|
||||
monkeypatch.setenv("XIAOMI_API_KEY", "sk-test-12345678")
|
||||
monkeypatch.delenv("XIAOMI_BASE_URL", raising=False)
|
||||
creds = resolve_api_key_provider_credentials("xiaomi")
|
||||
assert creds["api_key"] == "sk-test-12345678"
|
||||
assert creds["base_url"] == "https://api.xiaomimimo.com/v1"
|
||||
|
||||
def test_custom_base_url_override(self, monkeypatch):
|
||||
monkeypatch.setenv("XIAOMI_API_KEY", "sk-test-12345678")
|
||||
monkeypatch.setenv("XIAOMI_BASE_URL", "https://custom.xiaomi.example/v1")
|
||||
creds = resolve_api_key_provider_credentials("xiaomi")
|
||||
assert creds["base_url"] == "https://custom.xiaomi.example/v1"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Model catalog (dynamic — no static list)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestXiaomiModelCatalog:
|
||||
"""Xiaomi uses dynamic model discovery via models.dev."""
|
||||
|
||||
def test_models_dev_mapping(self):
|
||||
from agent.models_dev import PROVIDER_TO_MODELS_DEV
|
||||
assert PROVIDER_TO_MODELS_DEV["xiaomi"] == "xiaomi"
|
||||
|
||||
def test_static_model_list_fallback(self):
|
||||
"""Static _PROVIDER_MODELS fallback must exist for model picker."""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
assert "xiaomi" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["xiaomi"]
|
||||
assert "mimo-v2-pro" in models
|
||||
assert "mimo-v2-omni" in models
|
||||
assert "mimo-v2-flash" in models
|
||||
|
||||
def test_list_agentic_models_mock(self, monkeypatch):
|
||||
"""When models.dev returns Xiaomi data, list_agentic_models should return models."""
|
||||
from agent import models_dev as md
|
||||
|
||||
fake_data = {
|
||||
"xiaomi": {
|
||||
"name": "Xiaomi",
|
||||
"api": "https://api.xiaomimimo.com/v1",
|
||||
"env": ["XIAOMI_API_KEY"],
|
||||
"models": {
|
||||
"mimo-v2-pro": {
|
||||
"limit": {"context": 1000000},
|
||||
"tool_call": True,
|
||||
},
|
||||
"mimo-v2-omni": {
|
||||
"limit": {"context": 256000},
|
||||
"tool_call": True,
|
||||
},
|
||||
"mimo-v2-flash": {
|
||||
"limit": {"context": 256000},
|
||||
"tool_call": True,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
monkeypatch.setattr(md, "fetch_models_dev", lambda: fake_data)
|
||||
|
||||
result = md.list_agentic_models("xiaomi")
|
||||
assert "mimo-v2-pro" in result
|
||||
assert "mimo-v2-flash" in result
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Normalization
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestXiaomiNormalization:
|
||||
"""Model name normalization — Xiaomi is a direct provider."""
|
||||
|
||||
def test_vendor_prefix_mapping(self):
|
||||
from hermes_cli.model_normalize import _VENDOR_PREFIXES
|
||||
assert _VENDOR_PREFIXES.get("mimo") == "xiaomi"
|
||||
|
||||
def test_matching_prefix_strip(self):
|
||||
"""xiaomi/mimo-v2-pro should normalize to mimo-v2-pro for direct API."""
|
||||
from hermes_cli.model_normalize import _MATCHING_PREFIX_STRIP_PROVIDERS
|
||||
assert "xiaomi" in _MATCHING_PREFIX_STRIP_PROVIDERS
|
||||
|
||||
def test_normalize_strips_provider_prefix(self):
|
||||
from hermes_cli.model_normalize import normalize_model_for_provider
|
||||
result = normalize_model_for_provider("xiaomi/mimo-v2-pro", "xiaomi")
|
||||
assert result == "mimo-v2-pro"
|
||||
|
||||
def test_normalize_bare_name_unchanged(self):
|
||||
from hermes_cli.model_normalize import normalize_model_for_provider
|
||||
result = normalize_model_for_provider("mimo-v2-pro", "xiaomi")
|
||||
assert result == "mimo-v2-pro"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# URL mapping
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestXiaomiURLMapping:
|
||||
"""Test URL → provider inference for Xiaomi endpoints."""
|
||||
|
||||
def test_url_to_provider(self):
|
||||
from agent.model_metadata import _URL_TO_PROVIDER
|
||||
assert _URL_TO_PROVIDER.get("api.xiaomimimo.com") == "xiaomi"
|
||||
|
||||
def test_provider_prefixes(self):
|
||||
from agent.model_metadata import _PROVIDER_PREFIXES
|
||||
assert "xiaomi" in _PROVIDER_PREFIXES
|
||||
assert "mimo" in _PROVIDER_PREFIXES
|
||||
assert "xiaomi-mimo" in _PROVIDER_PREFIXES
|
||||
|
||||
def test_infer_from_url(self):
|
||||
from agent.model_metadata import _infer_provider_from_url
|
||||
assert _infer_provider_from_url("https://api.xiaomimimo.com/v1") == "xiaomi"
|
||||
|
||||
def test_infer_from_regional_urls(self):
|
||||
"""Regional token-plan endpoints should also resolve to xiaomi."""
|
||||
from agent.model_metadata import _infer_provider_from_url
|
||||
assert _infer_provider_from_url("https://token-plan-ams.xiaomimimo.com/v1") == "xiaomi"
|
||||
assert _infer_provider_from_url("https://token-plan-cn.xiaomimimo.com/v1") == "xiaomi"
|
||||
assert _infer_provider_from_url("https://token-plan-sgp.xiaomimimo.com/v1") == "xiaomi"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# providers.py
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestXiaomiProvidersModule:
|
||||
"""Test Xiaomi in the unified providers module."""
|
||||
|
||||
def test_overlay_exists(self):
|
||||
from hermes_cli.providers import HERMES_OVERLAYS
|
||||
assert "xiaomi" in HERMES_OVERLAYS
|
||||
overlay = HERMES_OVERLAYS["xiaomi"]
|
||||
assert overlay.transport == "openai_chat"
|
||||
assert overlay.base_url_env_var == "XIAOMI_BASE_URL"
|
||||
assert not overlay.is_aggregator
|
||||
|
||||
def test_alias_resolves(self):
|
||||
from hermes_cli.providers import normalize_provider
|
||||
assert normalize_provider("mimo") == "xiaomi"
|
||||
assert normalize_provider("xiaomi-mimo") == "xiaomi"
|
||||
|
||||
def test_label(self):
|
||||
from hermes_cli.providers import get_label
|
||||
assert get_label("xiaomi") == "Xiaomi MiMo"
|
||||
|
||||
def test_get_provider(self):
|
||||
pdef = None
|
||||
try:
|
||||
from hermes_cli.providers import get_provider
|
||||
pdef = get_provider("xiaomi")
|
||||
except Exception:
|
||||
pass
|
||||
if pdef is not None:
|
||||
assert pdef.id == "xiaomi"
|
||||
assert pdef.transport == "openai_chat"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Auxiliary client
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestXiaomiAuxiliary:
|
||||
"""Xiaomi auxiliary routing: vision → omni, non-vision → user's main model, never flash."""
|
||||
|
||||
def test_no_flash_in_aux_models(self):
|
||||
"""mimo-v2-flash must NEVER be used for automatic aux routing."""
|
||||
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
|
||||
assert "xiaomi" not in _API_KEY_PROVIDER_AUX_MODELS
|
||||
|
||||
def test_vision_model_override(self):
|
||||
"""Xiaomi vision tasks should use mimo-v2-omni (multimodal), not the main model."""
|
||||
from agent.auxiliary_client import _PROVIDER_VISION_MODELS
|
||||
assert "xiaomi" in _PROVIDER_VISION_MODELS
|
||||
assert _PROVIDER_VISION_MODELS["xiaomi"] == "mimo-v2-omni"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Agent init (no SyntaxError, correct api_mode)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestXiaomiDoctor:
|
||||
"""Verify hermes doctor recognizes Xiaomi env vars."""
|
||||
|
||||
def test_provider_env_hints(self):
|
||||
from hermes_cli.doctor import _PROVIDER_ENV_HINTS
|
||||
assert "XIAOMI_API_KEY" in _PROVIDER_ENV_HINTS
|
||||
|
||||
|
||||
class TestXiaomiAgentInit:
|
||||
"""Verify the agent can be constructed with xiaomi provider without errors."""
|
||||
|
||||
def test_no_syntax_errors(self):
|
||||
"""Importing run_agent with xiaomi should not raise."""
|
||||
import importlib
|
||||
importlib.import_module("run_agent")
|
||||
|
||||
def test_api_mode_is_chat_completions(self):
|
||||
from hermes_cli.providers import HERMES_OVERLAYS, TRANSPORT_TO_API_MODE
|
||||
overlay = HERMES_OVERLAYS["xiaomi"]
|
||||
api_mode = TRANSPORT_TO_API_MODE[overlay.transport]
|
||||
assert api_mode == "chat_completions"
|
||||
@@ -222,6 +222,12 @@ def test_api_mode_normalizes_provider_case(monkeypatch):
|
||||
|
||||
|
||||
def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypatch):
|
||||
"""GPT-5.x models need codex_responses even on OpenRouter.
|
||||
|
||||
OpenRouter rejects GPT-5 models on /v1/chat/completions with
|
||||
``unsupported_api_for_model``. The model-level check overrides
|
||||
the provider default.
|
||||
"""
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
agent = run_agent.AIAgent(
|
||||
model="gpt-5-codex",
|
||||
@@ -233,7 +239,7 @@ def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypat
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
assert agent.api_mode == "chat_completions"
|
||||
assert agent.api_mode == "codex_responses"
|
||||
assert agent.provider == "openrouter"
|
||||
|
||||
|
||||
|
||||
@@ -15,6 +15,10 @@ from tools.vision_tools import (
|
||||
_handle_vision_analyze,
|
||||
_determine_mime_type,
|
||||
_image_to_base64_data_url,
|
||||
_resize_image_for_vision,
|
||||
_is_image_size_error,
|
||||
_MAX_BASE64_BYTES,
|
||||
_RESIZE_TARGET_BYTES,
|
||||
vision_analyze_tool,
|
||||
check_vision_requirements,
|
||||
get_debug_session_info,
|
||||
@@ -590,11 +594,13 @@ class TestBase64SizeLimit:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_oversized_image_rejected_before_api_call(self, tmp_path):
|
||||
"""Images exceeding 5 MB base64 should fail with a clear size error."""
|
||||
"""Images exceeding the 20 MB hard limit should fail with a clear error."""
|
||||
img = tmp_path / "huge.png"
|
||||
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * (4 * 1024 * 1024))
|
||||
|
||||
with patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock) as mock_llm:
|
||||
# Patch the hard limit to a small value so the test runs fast.
|
||||
with patch("tools.vision_tools._MAX_BASE64_BYTES", 1000), \
|
||||
patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock) as mock_llm:
|
||||
result = json.loads(await vision_analyze_tool(str(img), "describe this"))
|
||||
|
||||
assert result["success"] is False
|
||||
@@ -686,3 +692,124 @@ class TestVisionRegistration:
|
||||
|
||||
entry = registry._tools.get("vision_analyze")
|
||||
assert callable(entry.handler)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _resize_image_for_vision — auto-resize oversized images
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestResizeImageForVision:
|
||||
"""Tests for the auto-resize function."""
|
||||
|
||||
def test_small_image_returned_as_is(self, tmp_path):
|
||||
"""Images under the limit should be returned unchanged."""
|
||||
# Create a small 10x10 red PNG
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
pytest.skip("Pillow not installed")
|
||||
img = Image.new("RGB", (10, 10), (255, 0, 0))
|
||||
path = tmp_path / "small.png"
|
||||
img.save(path, "PNG")
|
||||
|
||||
result = _resize_image_for_vision(path, mime_type="image/png")
|
||||
assert result.startswith("data:image/png;base64,")
|
||||
assert len(result) < _MAX_BASE64_BYTES
|
||||
|
||||
def test_large_image_is_resized(self, tmp_path):
|
||||
"""Images over the default target should be auto-resized to fit."""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
pytest.skip("Pillow not installed")
|
||||
# Create a large image that will exceed 5 MB in base64
|
||||
# A 4000x4000 uncompressed PNG will be large
|
||||
img = Image.new("RGB", (4000, 4000), (128, 200, 50))
|
||||
path = tmp_path / "large.png"
|
||||
img.save(path, "PNG")
|
||||
|
||||
result = _resize_image_for_vision(path, mime_type="image/png")
|
||||
assert result.startswith("data:image/png;base64,")
|
||||
# Default target is _RESIZE_TARGET_BYTES (5 MB), not _MAX_BASE64_BYTES (20 MB)
|
||||
assert len(result) <= _RESIZE_TARGET_BYTES
|
||||
|
||||
def test_custom_max_bytes(self, tmp_path):
|
||||
"""The max_base64_bytes parameter should be respected."""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
pytest.skip("Pillow not installed")
|
||||
img = Image.new("RGB", (200, 200), (0, 128, 255))
|
||||
path = tmp_path / "medium.png"
|
||||
img.save(path, "PNG")
|
||||
|
||||
# Set a very low limit to force resizing
|
||||
result = _resize_image_for_vision(path, max_base64_bytes=500)
|
||||
# Should still return a valid data URL
|
||||
assert result.startswith("data:image/")
|
||||
|
||||
def test_jpeg_output_for_non_png(self, tmp_path):
|
||||
"""Non-PNG images should be resized as JPEG."""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
pytest.skip("Pillow not installed")
|
||||
img = Image.new("RGB", (2000, 2000), (255, 128, 0))
|
||||
path = tmp_path / "photo.jpg"
|
||||
img.save(path, "JPEG", quality=95)
|
||||
|
||||
result = _resize_image_for_vision(path, mime_type="image/jpeg",
|
||||
max_base64_bytes=50_000)
|
||||
assert result.startswith("data:image/jpeg;base64,")
|
||||
|
||||
def test_constants_sane(self):
|
||||
"""Hard limit should be larger than resize target."""
|
||||
assert _MAX_BASE64_BYTES == 20 * 1024 * 1024
|
||||
assert _RESIZE_TARGET_BYTES == 5 * 1024 * 1024
|
||||
assert _MAX_BASE64_BYTES > _RESIZE_TARGET_BYTES
|
||||
|
||||
def test_no_pillow_returns_original(self, tmp_path):
|
||||
"""Without Pillow, oversized images should be returned as-is."""
|
||||
# Create a dummy file
|
||||
path = tmp_path / "test.png"
|
||||
# Write enough bytes to exceed a tiny limit
|
||||
path.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 1000)
|
||||
|
||||
with patch("tools.vision_tools._image_to_base64_data_url") as mock_b64:
|
||||
# Simulate a large base64 result
|
||||
mock_b64.return_value = "data:image/png;base64," + "A" * 200
|
||||
with patch.dict("sys.modules", {"PIL": None, "PIL.Image": None}):
|
||||
result = _resize_image_for_vision(path, max_base64_bytes=100)
|
||||
# Should return the original (oversized) data url
|
||||
assert len(result) > 100
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _is_image_size_error — detect size-related API errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIsImageSizeError:
|
||||
"""Tests for the size-error detection helper."""
|
||||
|
||||
def test_too_large_message(self):
|
||||
assert _is_image_size_error(Exception("Request payload too large"))
|
||||
|
||||
def test_413_status(self):
|
||||
assert _is_image_size_error(Exception("HTTP 413 Payload Too Large"))
|
||||
|
||||
def test_invalid_request(self):
|
||||
assert _is_image_size_error(Exception("invalid_request_error: image too big"))
|
||||
|
||||
def test_exceeds_limit(self):
|
||||
assert _is_image_size_error(Exception("Image exceeds maximum size"))
|
||||
|
||||
def test_unrelated_error(self):
|
||||
assert not _is_image_size_error(Exception("Connection refused"))
|
||||
|
||||
def test_auth_error(self):
|
||||
assert not _is_image_size_error(Exception("401 Unauthorized"))
|
||||
|
||||
def test_empty_message(self):
|
||||
assert not _is_image_size_error(Exception(""))
|
||||
|
||||
@@ -1873,10 +1873,10 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||
),
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# Read and convert to base64
|
||||
image_data = screenshot_path.read_bytes()
|
||||
image_base64 = base64.b64encode(image_data).decode("ascii")
|
||||
data_url = f"data:image/png;base64,{image_base64}"
|
||||
# Convert screenshot to base64 at full resolution.
|
||||
_screenshot_bytes = screenshot_path.read_bytes()
|
||||
_screenshot_b64 = base64.b64encode(_screenshot_bytes).decode("ascii")
|
||||
data_url = f"data:image/png;base64,{_screenshot_b64}"
|
||||
|
||||
vision_prompt = (
|
||||
f"You are analyzing a screenshot of a web browser.\n\n"
|
||||
@@ -1890,7 +1890,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||
# Use the centralized LLM router
|
||||
vision_model = _get_vision_model()
|
||||
logger.debug("browser_vision: analysing screenshot (%d bytes)",
|
||||
len(image_data))
|
||||
len(_screenshot_bytes))
|
||||
|
||||
# Read vision timeout from config (auxiliary.vision.timeout), default 120s.
|
||||
# Local vision models (llama.cpp, ollama) can take well over 30s for
|
||||
@@ -1922,7 +1922,27 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||
}
|
||||
if vision_model:
|
||||
call_kwargs["model"] = vision_model
|
||||
response = call_llm(**call_kwargs)
|
||||
# Try full-size screenshot; on size-related rejection, downscale and retry.
|
||||
try:
|
||||
response = call_llm(**call_kwargs)
|
||||
except Exception as _api_err:
|
||||
from tools.vision_tools import (
|
||||
_is_image_size_error, _resize_image_for_vision, _RESIZE_TARGET_BYTES,
|
||||
)
|
||||
if (_is_image_size_error(_api_err)
|
||||
and len(data_url) > _RESIZE_TARGET_BYTES):
|
||||
logger.info(
|
||||
"Vision API rejected screenshot (%.1f MB); "
|
||||
"auto-resizing to ~%.0f MB and retrying...",
|
||||
len(data_url) / (1024 * 1024),
|
||||
_RESIZE_TARGET_BYTES / (1024 * 1024),
|
||||
)
|
||||
data_url = _resize_image_for_vision(
|
||||
screenshot_path, mime_type="image/png")
|
||||
call_kwargs["messages"][0]["content"][1]["image_url"]["url"] = data_url
|
||||
response = call_llm(**call_kwargs)
|
||||
else:
|
||||
raise
|
||||
|
||||
analysis = (response.choices[0].message.content or "").strip()
|
||||
# Redact secrets the vision LLM may have read from the screenshot.
|
||||
|
||||
@@ -277,6 +277,120 @@ def _image_to_base64_data_url(image_path: Path, mime_type: Optional[str] = None)
|
||||
return data_url
|
||||
|
||||
|
||||
# Hard limit for vision API payloads (20 MB) — matches the most restrictive
|
||||
# major provider (Gemini inline data limit). Images above this are rejected.
|
||||
_MAX_BASE64_BYTES = 20 * 1024 * 1024
|
||||
|
||||
# Target size when auto-resizing on API failure (5 MB). After a provider
|
||||
# rejects an image, we downscale to this target and retry once.
|
||||
_RESIZE_TARGET_BYTES = 5 * 1024 * 1024
|
||||
|
||||
|
||||
def _is_image_size_error(error: Exception) -> bool:
|
||||
"""Detect if an API error is related to image or payload size."""
|
||||
err_str = str(error).lower()
|
||||
return any(hint in err_str for hint in (
|
||||
"too large", "payload", "413", "content_too_large",
|
||||
"request_too_large", "image_url", "invalid_request",
|
||||
"exceeds", "size limit",
|
||||
))
|
||||
|
||||
|
||||
def _resize_image_for_vision(image_path: Path, mime_type: Optional[str] = None,
|
||||
max_base64_bytes: int = _RESIZE_TARGET_BYTES) -> str:
|
||||
"""Convert an image to a base64 data URL, auto-resizing if too large.
|
||||
|
||||
Tries Pillow first to progressively downscale oversized images. If Pillow
|
||||
is not installed or resizing still exceeds the limit, falls back to the raw
|
||||
bytes and lets the caller handle the size check.
|
||||
|
||||
Returns the base64 data URL string.
|
||||
"""
|
||||
# Quick file-size estimate: base64 expands by ~4/3, plus data URL header.
|
||||
# Skip the expensive full-read + encode if Pillow can resize directly.
|
||||
file_size = image_path.stat().st_size
|
||||
estimated_b64 = (file_size * 4) // 3 + 100 # ~header overhead
|
||||
if estimated_b64 <= max_base64_bytes:
|
||||
# Small enough — just encode directly.
|
||||
data_url = _image_to_base64_data_url(image_path, mime_type=mime_type)
|
||||
if len(data_url) <= max_base64_bytes:
|
||||
return data_url
|
||||
else:
|
||||
data_url = None # defer full encode; try Pillow resize first
|
||||
|
||||
# Attempt auto-resize with Pillow (soft dependency)
|
||||
try:
|
||||
from PIL import Image
|
||||
import io as _io
|
||||
except ImportError:
|
||||
logger.info("Pillow not installed — cannot auto-resize oversized image")
|
||||
if data_url is None:
|
||||
data_url = _image_to_base64_data_url(image_path, mime_type=mime_type)
|
||||
return data_url # caller will raise the size error
|
||||
|
||||
logger.info("Image file is %.1f MB (estimated base64 %.1f MB, limit %.1f MB), auto-resizing...",
|
||||
file_size / (1024 * 1024), estimated_b64 / (1024 * 1024),
|
||||
max_base64_bytes / (1024 * 1024))
|
||||
|
||||
mime = mime_type or _determine_mime_type(image_path)
|
||||
# Choose output format: JPEG for photos (smaller), PNG for transparency
|
||||
pil_format = "PNG" if mime == "image/png" else "JPEG"
|
||||
out_mime = "image/png" if pil_format == "PNG" else "image/jpeg"
|
||||
|
||||
try:
|
||||
img = Image.open(image_path)
|
||||
except Exception as exc:
|
||||
logger.info("Pillow cannot open image for resizing: %s", exc)
|
||||
if data_url is None:
|
||||
data_url = _image_to_base64_data_url(image_path, mime_type=mime_type)
|
||||
return data_url # fall through to size-check in caller
|
||||
# Convert RGBA to RGB for JPEG output
|
||||
if pil_format == "JPEG" and img.mode in ("RGBA", "P"):
|
||||
img = img.convert("RGB")
|
||||
|
||||
# Strategy: halve dimensions until base64 fits, up to 4 rounds.
|
||||
# For JPEG, also try reducing quality at each size step.
|
||||
# For PNG, quality is irrelevant — only dimension reduction helps.
|
||||
quality_steps = (85, 70, 50) if pil_format == "JPEG" else (None,)
|
||||
prev_dims = (img.width, img.height)
|
||||
candidate = None # will be set on first loop iteration
|
||||
|
||||
for attempt in range(5):
|
||||
if attempt > 0:
|
||||
new_w = max(img.width // 2, 64)
|
||||
new_h = max(img.height // 2, 64)
|
||||
# Stop if dimensions can't shrink further
|
||||
if (new_w, new_h) == prev_dims:
|
||||
break
|
||||
img = img.resize((new_w, new_h), Image.LANCZOS)
|
||||
prev_dims = (new_w, new_h)
|
||||
logger.info("Resized to %dx%d (attempt %d)", new_w, new_h, attempt)
|
||||
|
||||
for q in quality_steps:
|
||||
buf = _io.BytesIO()
|
||||
save_kwargs = {"format": pil_format}
|
||||
if q is not None:
|
||||
save_kwargs["quality"] = q
|
||||
img.save(buf, **save_kwargs)
|
||||
encoded = base64.b64encode(buf.getvalue()).decode("ascii")
|
||||
candidate = f"data:{out_mime};base64,{encoded}"
|
||||
if len(candidate) <= max_base64_bytes:
|
||||
logger.info("Auto-resized image fits: %.1f MB (quality=%s, %dx%d)",
|
||||
len(candidate) / (1024 * 1024), q,
|
||||
img.width, img.height)
|
||||
return candidate
|
||||
|
||||
# If we still can't get it small enough, return the best attempt
|
||||
# and let the caller decide
|
||||
if candidate is not None:
|
||||
logger.warning("Auto-resize could not fit image under %.1f MB (best: %.1f MB)",
|
||||
max_base64_bytes / (1024 * 1024), len(candidate) / (1024 * 1024))
|
||||
return candidate
|
||||
|
||||
# Shouldn't reach here, but fall back to full encode
|
||||
return data_url or _image_to_base64_data_url(image_path, mime_type=mime_type)
|
||||
|
||||
|
||||
async def vision_analyze_tool(
|
||||
image_url: str,
|
||||
user_prompt: str,
|
||||
@@ -376,24 +490,27 @@ async def vision_analyze_tool(
|
||||
if not detected_mime_type:
|
||||
raise ValueError("Only real image files are supported for vision analysis.")
|
||||
|
||||
# Convert image to base64 data URL
|
||||
# Convert image to base64 — send at full resolution first.
|
||||
# If the provider rejects it as too large, we auto-resize and retry.
|
||||
logger.info("Converting image to base64...")
|
||||
image_data_url = _image_to_base64_data_url(temp_image_path, mime_type=detected_mime_type)
|
||||
# Calculate size in KB for better readability
|
||||
data_size_kb = len(image_data_url) / 1024
|
||||
logger.info("Image converted to base64 (%.1f KB)", data_size_kb)
|
||||
|
||||
# Pre-flight size check: most vision APIs cap base64 payloads at 5 MB.
|
||||
# Reject early with a clear message instead of a cryptic provider 400.
|
||||
_MAX_BASE64_BYTES = 5 * 1024 * 1024 # 5 MB
|
||||
# The data URL includes the header (e.g. "data:image/jpeg;base64,") which
|
||||
# is negligible, but measure the full string to be safe.
|
||||
# Hard limit (20 MB) — no provider accepts payloads this large.
|
||||
if len(image_data_url) > _MAX_BASE64_BYTES:
|
||||
raise ValueError(
|
||||
f"Image too large for vision API: base64 payload is "
|
||||
f"{len(image_data_url) / (1024 * 1024):.1f} MB (limit 5 MB). "
|
||||
f"Resize or compress the image and try again."
|
||||
)
|
||||
# Try to resize down to 5 MB before giving up.
|
||||
image_data_url = _resize_image_for_vision(
|
||||
temp_image_path, mime_type=detected_mime_type)
|
||||
if len(image_data_url) > _MAX_BASE64_BYTES:
|
||||
raise ValueError(
|
||||
f"Image too large for vision API: base64 payload is "
|
||||
f"{len(image_data_url) / (1024 * 1024):.1f} MB "
|
||||
f"(limit {_MAX_BASE64_BYTES / (1024 * 1024):.0f} MB) "
|
||||
f"even after resizing. "
|
||||
f"Install Pillow (`pip install Pillow`) for better auto-resize, "
|
||||
f"or compress the image manually."
|
||||
)
|
||||
|
||||
debug_call_data["image_size_bytes"] = image_size_bytes
|
||||
|
||||
@@ -442,7 +559,24 @@ async def vision_analyze_tool(
|
||||
}
|
||||
if model:
|
||||
call_kwargs["model"] = model
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
# Try full-size image first; on size-related rejection, downscale and retry.
|
||||
try:
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
except Exception as _api_err:
|
||||
if (_is_image_size_error(_api_err)
|
||||
and len(image_data_url) > _RESIZE_TARGET_BYTES):
|
||||
logger.info(
|
||||
"API rejected image (%.1f MB, likely too large); "
|
||||
"auto-resizing to ~%.0f MB and retrying...",
|
||||
len(image_data_url) / (1024 * 1024),
|
||||
_RESIZE_TARGET_BYTES / (1024 * 1024),
|
||||
)
|
||||
image_data_url = _resize_image_for_vision(
|
||||
temp_image_path, mime_type=detected_mime_type)
|
||||
messages[0]["content"][1]["image_url"]["url"] = image_data_url
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
else:
|
||||
raise
|
||||
|
||||
# Extract the analysis — fall back to reasoning if content is empty
|
||||
analysis = extract_content_or_reasoning(response)
|
||||
@@ -498,8 +632,8 @@ async def vision_analyze_tool(
|
||||
elif "invalid_request" in err_str or "image_url" in err_str:
|
||||
analysis = (
|
||||
"The vision API rejected the image. This can happen when the "
|
||||
"image is too large, in an unsupported format, or corrupted. "
|
||||
"Try a smaller JPEG/PNG (under 3.5 MB) and retry. "
|
||||
"image is in an unsupported format, corrupted, or still too "
|
||||
"large after auto-resize. Try a smaller JPEG/PNG and retry. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
else:
|
||||
|
||||
@@ -143,6 +143,10 @@ to find the parent assistant message, keeping groups intact.
|
||||
|
||||
### Phase 3: Generate Structured Summary
|
||||
|
||||
:::warning Summary model context length
|
||||
The summary model must have a context window **at least as large** as the main agent model's. The entire middle section is sent to the summary model in a single `call_llm(task="compression")` call. If the summary model's context is smaller, the API returns a context-length error — `_generate_summary()` catches it, logs a warning, and returns `None`. The compressor then drops the middle turns **without a summary**, silently losing conversation context. This is the most common cause of degraded compaction quality.
|
||||
:::
|
||||
|
||||
The middle turns are summarized using the auxiliary LLM with a structured
|
||||
template:
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
|
||||
| **MiniMax China** | `MINIMAX_CN_API_KEY` in `~/.hermes/.env` (provider: `minimax-cn`) |
|
||||
| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`, aliases: `dashscope`, `qwen`) |
|
||||
| **Kilo Code** | `KILOCODE_API_KEY` in `~/.hermes/.env` (provider: `kilocode`) |
|
||||
| **Xiaomi MiMo** | `XIAOMI_API_KEY` in `~/.hermes/.env` (provider: `xiaomi`, aliases: `mimo`, `xiaomi-mimo`) |
|
||||
| **OpenCode Zen** | `OPENCODE_ZEN_API_KEY` in `~/.hermes/.env` (provider: `opencode-zen`) |
|
||||
| **OpenCode Go** | `OPENCODE_GO_API_KEY` in `~/.hermes/.env` (provider: `opencode-go`) |
|
||||
| **DeepSeek** | `DEEPSEEK_API_KEY` in `~/.hermes/.env` (provider: `deepseek`) |
|
||||
@@ -157,16 +158,20 @@ hermes chat --provider minimax-cn --model MiniMax-M2.7
|
||||
# Alibaba Cloud / DashScope (Qwen models)
|
||||
hermes chat --provider alibaba --model qwen3.5-plus
|
||||
# Requires: DASHSCOPE_API_KEY in ~/.hermes/.env
|
||||
|
||||
# Xiaomi MiMo
|
||||
hermes chat --provider xiaomi --model mimo-v2-pro
|
||||
# Requires: XIAOMI_API_KEY in ~/.hermes/.env
|
||||
```
|
||||
|
||||
Or set the provider permanently in `config.yaml`:
|
||||
```yaml
|
||||
model:
|
||||
provider: "zai" # or: kimi-coding, minimax, minimax-cn, alibaba
|
||||
provider: "zai" # or: kimi-coding, minimax, minimax-cn, alibaba, xiaomi
|
||||
default: "glm-5"
|
||||
```
|
||||
|
||||
Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, or `DASHSCOPE_BASE_URL` environment variables.
|
||||
Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, `DASHSCOPE_BASE_URL`, or `XIAOMI_BASE_URL` environment variables.
|
||||
|
||||
:::note Z.AI Endpoint Auto-Detection
|
||||
When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoints (global, China, coding variants) to find one that accepts your API key. You don't need to set `GLM_BASE_URL` manually — the working endpoint is detected and cached automatically.
|
||||
@@ -849,7 +854,7 @@ You can also select named custom providers from the interactive `hermes model` m
|
||||
| **Cost optimization** | ClawRouter or OpenRouter with `sort: "price"` |
|
||||
| **Maximum privacy** | Ollama, vLLM, or llama.cpp (fully local) |
|
||||
| **Enterprise / Azure** | Azure OpenAI with custom endpoint |
|
||||
| **Chinese AI models** | z.ai (GLM), Kimi/Moonshot, or MiniMax (first-class providers) |
|
||||
| **Chinese AI models** | z.ai (GLM), Kimi/Moonshot, MiniMax, or Xiaomi MiMo (first-class providers) |
|
||||
|
||||
:::tip
|
||||
You can switch between providers at any time with `hermes model` — no restart required. Your conversation history, memory, and skills carry over regardless of which provider you use.
|
||||
@@ -924,7 +929,7 @@ fallback_model:
|
||||
|
||||
When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session.
|
||||
|
||||
Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `deepseek`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `alibaba`, `custom`.
|
||||
Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `deepseek`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `alibaba`, `custom`.
|
||||
|
||||
:::tip
|
||||
Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
|
||||
|
||||
@@ -76,7 +76,7 @@ Common options:
|
||||
| `-q`, `--query "..."` | One-shot, non-interactive prompt. |
|
||||
| `-m`, `--model <model>` | Override the model for this run. |
|
||||
| `-t`, `--toolsets <csv>` | Enable a comma-separated set of toolsets. |
|
||||
| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `deepseek`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `alibaba`. |
|
||||
| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `deepseek`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `alibaba`. |
|
||||
| `-s`, `--skills <name>` | Preload one or more skills for the session (can be repeated or comma-separated). |
|
||||
| `-v`, `--verbose` | Verbose output. |
|
||||
| `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. |
|
||||
|
||||
@@ -37,6 +37,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
||||
| `MINIMAX_CN_BASE_URL` | Override MiniMax China base URL (default: `https://api.minimaxi.com/v1`) |
|
||||
| `KILOCODE_API_KEY` | Kilo Code API key ([kilo.ai](https://kilo.ai)) |
|
||||
| `KILOCODE_BASE_URL` | Override Kilo Code base URL (default: `https://api.kilo.ai/api/gateway`) |
|
||||
| `XIAOMI_API_KEY` | Xiaomi MiMo API key ([platform.xiaomimimo.com](https://platform.xiaomimimo.com)) |
|
||||
| `XIAOMI_BASE_URL` | Override Xiaomi MiMo base URL (default: `https://api.xiaomimimo.com/v1`) |
|
||||
| `HF_TOKEN` | Hugging Face token for Inference Providers ([huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)) |
|
||||
| `HF_BASE_URL` | Override Hugging Face base URL (default: `https://router.huggingface.co/v1`) |
|
||||
| `GOOGLE_API_KEY` | Google AI Studio API key ([aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey)) |
|
||||
@@ -65,7 +67,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`, `alibaba`, `deepseek`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
|
||||
| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `alibaba`, `deepseek`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
|
||||
| `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
|
||||
| `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
|
||||
| `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
|
||||
|
||||
@@ -480,7 +480,9 @@ Points at a custom OpenAI-compatible endpoint. Uses `OPENAI_API_KEY` for auth.
|
||||
| `nous` / `openrouter` / etc. | not set | Force that provider, use its auth |
|
||||
| any | set | Use the custom endpoint directly (provider ignored) |
|
||||
|
||||
The `summary_model` must support a context length at least as large as your main model's, since it receives the full middle section of the conversation for compression.
|
||||
:::warning Summary model context length requirement
|
||||
The `summary_model` **must** have a context window at least as large as your main agent model's. The compressor sends the full middle section of the conversation to the summary model — if that model's context window is smaller than the main model's, the summarization call will fail with a context length error. When this happens, the middle turns are **dropped without a summary**, losing conversation context silently. If you override `summary_model`, verify its context length meets or exceeds your main model's.
|
||||
:::
|
||||
|
||||
## Context Engine
|
||||
|
||||
|
||||
@@ -50,6 +50,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
|
||||
| OpenCode Zen | `opencode-zen` | `OPENCODE_ZEN_API_KEY` |
|
||||
| OpenCode Go | `opencode-go` | `OPENCODE_GO_API_KEY` |
|
||||
| Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
|
||||
| Xiaomi MiMo | `xiaomi` | `XIAOMI_API_KEY` |
|
||||
| Alibaba / DashScope | `alibaba` | `DASHSCOPE_API_KEY` |
|
||||
| Hugging Face | `huggingface` | `HF_TOKEN` |
|
||||
| Custom endpoint | `custom` | `base_url` + `api_key_env` (see below) |
|
||||
@@ -169,7 +170,7 @@ When a task's provider is set to `"auto"` (the default), Hermes tries providers
|
||||
|
||||
```text
|
||||
OpenRouter → Nous Portal → Custom endpoint → Codex OAuth →
|
||||
API-key providers (z.ai, Kimi, MiniMax, Hugging Face, Anthropic) → give up
|
||||
API-key providers (z.ai, Kimi, MiniMax, Xiaomi MiMo, Hugging Face, Anthropic) → give up
|
||||
```
|
||||
|
||||
**For vision tasks:**
|
||||
|
||||
@@ -212,7 +212,24 @@ When users click buttons or interact with interactive cards sent by the bot, the
|
||||
|
||||
Card action events are dispatched with `MessageType.COMMAND`, so they flow through the normal command processing pipeline.
|
||||
|
||||
To use this feature, enable the **Interactive Card** event in your Feishu app's event subscriptions (`card.action.trigger`).
|
||||
This is also how **command approval** works — when the agent needs to run a dangerous command, it sends an interactive card with Allow Once / Session / Always / Deny buttons. The user clicks a button, and the card action callback delivers the approval decision back to the agent.
|
||||
|
||||
### Required Feishu App Configuration
|
||||
|
||||
Interactive cards require **three** configuration steps in the Feishu Developer Console. Missing any of them causes error **200340** when users click card buttons.
|
||||
|
||||
1. **Subscribe to the card action event:**
|
||||
In **Event Subscriptions**, add `card.action.trigger` to your subscribed events.
|
||||
|
||||
2. **Enable the Interactive Card capability:**
|
||||
In **App Features > Bot**, ensure the **Interactive Card** toggle is enabled. This tells Feishu that your app can receive card action callbacks.
|
||||
|
||||
3. **Configure the Card Request URL (webhook mode only):**
|
||||
In **App Features > Bot > Message Card Request URL**, set the URL to the same endpoint as your event webhook (e.g. `https://your-server:8765/feishu/webhook`). In WebSocket mode this is handled automatically by the SDK.
|
||||
|
||||
:::warning
|
||||
Without all three steps, Feishu will successfully *send* interactive cards (sending only requires `im:message:send` permission), but clicking any button will return error 200340. The card appears to work — the error only surfaces when a user interacts with it.
|
||||
:::
|
||||
|
||||
## Media Support
|
||||
|
||||
@@ -412,6 +429,7 @@ WebSocket and per-group ACL settings are configured via `config.yaml` under `pla
|
||||
| Post messages show as plain text | The Feishu API rejected the post payload; this is normal fallback behavior. Check logs for details. |
|
||||
| Images/files not received by bot | Grant `im:message` and `im:resource` permission scopes to your Feishu app |
|
||||
| Bot identity not auto-detected | Grant `admin:app.info:readonly` scope, or set `FEISHU_BOT_OPEN_ID` / `FEISHU_BOT_NAME` manually |
|
||||
| Error 200340 when clicking approval buttons | Enable **Interactive Card** capability and configure **Card Request URL** in the Feishu Developer Console. See [Required Feishu App Configuration](#required-feishu-app-configuration) above. |
|
||||
| `Webhook rate limit exceeded` | More than 120 requests/minute from the same IP. This is usually a misconfiguration or loop. |
|
||||
|
||||
## Toolset
|
||||
|
||||
Reference in New Issue
Block a user