From 76bc27199fcd7379909c64c9be6ebac2f38bc929 Mon Sep 17 00:00:00 2001 From: Test Date: Fri, 20 Mar 2026 10:02:42 -0700 Subject: [PATCH 1/3] fix(cli, agent): improve streaming handling and state management - Updated _stream_delta method in HermesCLI to handle None values, flushing the stream and resetting state for clean tool execution. - Enhanced quiet mode handling in AIAgent to ensure proper display closure before tool execution, preventing display issues with intermediate streamed content. These changes improve the robustness of the streaming functionality and ensure a smoother user experience during tool interactions. --- cli.py | 10 +++++++++- run_agent.py | 16 ++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/cli.py b/cli.py index af8ac4efc2..e831dab358 100755 --- a/cli.py +++ b/cli.py @@ -1504,7 +1504,7 @@ class HermesCLI: _cprint(f"{_DIM}└{'─' * (w - 2)}┘{_RST}") self._reasoning_box_opened = False - def _stream_delta(self, text: str) -> None: + def _stream_delta(self, text) -> None: """Line-buffered streaming callback for real-time token rendering. Receives text deltas from the agent as tokens arrive. Buffers @@ -1514,7 +1514,15 @@ class HermesCLI: Reasoning/thinking blocks (, , etc.) are suppressed during streaming since they'd display raw XML tags. The agent strips them from the final response anyway. + + A ``None`` value signals an intermediate turn boundary (tools are + about to execute). Flushes any open boxes and resets state so + tool feed lines render cleanly between turns. """ + if text is None: + self._flush_stream() + self._reset_stream_state() + return if not text: return diff --git a/run_agent.py b/run_agent.py index 1c3b25fe22..0e444b1adf 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4838,7 +4838,7 @@ class AIAgent: spinner.stop(cute_msg) elif self.quiet_mode: self._vprint(f" {cute_msg}") - elif self.quiet_mode and not self._has_stream_consumers(): + elif self.quiet_mode: face = random.choice(KawaiiSpinner.KAWAII_WAITING) emoji = _get_tool_emoji(function_name) preview = _build_tool_preview(function_name, function_args) or function_name @@ -6568,7 +6568,19 @@ class AIAgent: self._vprint(f" ┊ 💬 {clean}") messages.append(assistant_msg) - + + # Close any open streaming display (response box, reasoning + # box) before tool execution begins. Intermediate turns may + # have streamed early content that opened the response box; + # flushing here prevents it from wrapping tool feed lines. + # Only signal the display callback — TTS (_stream_callback) + # should NOT receive None (it uses None as end-of-stream). + if self.stream_delta_callback: + try: + self.stream_delta_callback(None) + except Exception: + pass + _msg_count_before_tools = len(messages) self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count) From 900e848522091bcdc82bdc33ebd6055be04bc1e2 Mon Sep 17 00:00:00 2001 From: Test Date: Fri, 20 Mar 2026 11:57:24 -0700 Subject: [PATCH 2/3] fix: infer provider from base URL for models.dev context length lookup Custom endpoint users (DashScope/Alibaba, Z.AI, Kimi, DeepSeek, etc.) get wrong context lengths because their provider resolves as "openrouter" or "custom", skipping the models.dev lookup entirely. For example, qwen3.5-plus on DashScope falls to the generic "qwen" hardcoded default (131K) instead of the correct 1M. Add _infer_provider_from_url() that maps known API hostnames to their models.dev provider IDs. When the explicit provider is generic (openrouter/custom/empty), infer from the base URL before the models.dev lookup. This resolves context lengths correctly for DashScope, Z.AI, Kimi, MiniMax, DeepSeek, and Nous endpoints without requiring users to manually set context_length in config. Also refactors _is_known_provider_base_url() to use the same URL mapping, removing the duplicated hostname list. --- agent/model_metadata.py | 57 ++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index be63719e24..bea53611ac 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -151,22 +151,41 @@ def _is_custom_endpoint(base_url: str) -> bool: return bool(normalized) and not _is_openrouter_base_url(normalized) -def _is_known_provider_base_url(base_url: str) -> bool: +_URL_TO_PROVIDER: Dict[str, str] = { + "api.openai.com": "openai", + "chatgpt.com": "openai", + "api.anthropic.com": "anthropic", + "api.z.ai": "zai", + "api.moonshot.ai": "kimi-coding", + "api.kimi.com": "kimi-coding", + "api.minimax": "minimax", + "dashscope.aliyuncs.com": "alibaba", + "openrouter.ai": "openrouter", + "inference-api.nousresearch.com": "nous", + "api.deepseek.com": "deepseek", +} + + +def _infer_provider_from_url(base_url: str) -> Optional[str]: + """Infer the models.dev provider name from a base URL. + + This allows context length resolution via models.dev for custom endpoints + like DashScope (Alibaba), Z.AI, Kimi, etc. without requiring the user to + explicitly set the provider name in config. + """ normalized = _normalize_base_url(base_url) if not normalized: - return False + return None parsed = urlparse(normalized if "://" in normalized else f"https://{normalized}") host = parsed.netloc.lower() or parsed.path.lower() - known_hosts = ( - "api.openai.com", - "chatgpt.com", - "api.anthropic.com", - "api.z.ai", - "api.moonshot.ai", - "api.kimi.com", - "api.minimax", - ) - return any(known_host in host for known_host in known_hosts) + for url_part, provider in _URL_TO_PROVIDER.items(): + if url_part in host: + return provider + return None + + +def _is_known_provider_base_url(base_url: str) -> bool: + return _infer_provider_from_url(base_url) is not None def is_local_endpoint(base_url: str) -> bool: @@ -808,13 +827,21 @@ def get_model_context_length( # These are provider-specific and take priority over the generic OR cache, # since the same model can have different context limits per provider # (e.g. claude-opus-4.6 is 1M on Anthropic but 128K on GitHub Copilot). - if provider == "nous": + # If provider is generic (openrouter/custom/empty), try to infer from URL. + effective_provider = provider + if not effective_provider or effective_provider in ("openrouter", "custom"): + if base_url: + inferred = _infer_provider_from_url(base_url) + if inferred: + effective_provider = inferred + + if effective_provider == "nous": ctx = _resolve_nous_context_length(model) if ctx: return ctx - if provider: + if effective_provider: from agent.models_dev import lookup_models_dev_context - ctx = lookup_models_dev_context(provider, model) + ctx = lookup_models_dev_context(effective_provider, model) if ctx: return ctx From 59074df021028941ee68492d23e5169920b886c7 Mon Sep 17 00:00:00 2001 From: Test Date: Fri, 20 Mar 2026 12:51:39 -0700 Subject: [PATCH 3/3] fix: add dashscope-intl.aliyuncs.com to URL-to-provider mapping The official international DashScope endpoint uses dashscope-intl.aliyuncs.com (per Alibaba docs), which the substring match on dashscope.aliyuncs.com misses because of the hyphenated prefix. --- agent/model_metadata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index bea53611ac..e3636b6fe4 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -160,6 +160,7 @@ _URL_TO_PROVIDER: Dict[str, str] = { "api.kimi.com": "kimi-coding", "api.minimax": "minimax", "dashscope.aliyuncs.com": "alibaba", + "dashscope-intl.aliyuncs.com": "alibaba", "openrouter.ai": "openrouter", "inference-api.nousresearch.com": "nous", "api.deepseek.com": "deepseek",