diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 9ed6c4a2bc..be63719e24 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -34,17 +34,29 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({ }) +_OLLAMA_TAG_PATTERN = re.compile( + r"^(\d+\.?\d*b|latest|stable|q\d|fp?\d|instruct|chat|coder|vision|text)", + re.IGNORECASE, +) + + def _strip_provider_prefix(model: str) -> str: """Strip a recognised provider prefix from a model string. ``"local:my-model"`` → ``"my-model"`` ``"qwen3.5:27b"`` → ``"qwen3.5:27b"`` (unchanged — not a provider prefix) + ``"qwen:0.5b"`` → ``"qwen:0.5b"`` (unchanged — Ollama model:tag) + ``"deepseek:latest"``→ ``"deepseek:latest"``(unchanged — Ollama model:tag) """ if ":" not in model or model.startswith("http"): return model - prefix = model.split(":", 1)[0].strip().lower() - if prefix in _PROVIDER_PREFIXES: - return model.split(":", 1)[1] + prefix, suffix = model.split(":", 1) + prefix_lower = prefix.strip().lower() + if prefix_lower in _PROVIDER_PREFIXES: + # Don't strip if suffix looks like an Ollama tag (e.g. "7b", "latest", "q4_0") + if _OLLAMA_TAG_PATTERN.match(suffix.strip()): + return model + return suffix return model _model_metadata_cache: Dict[str, Dict[str, Any]] = {} @@ -800,7 +812,7 @@ def get_model_context_length( ctx = _resolve_nous_context_length(model) if ctx: return ctx - elif provider: + if provider: from agent.models_dev import lookup_models_dev_context ctx = lookup_models_dev_context(provider, model) if ctx: @@ -812,10 +824,13 @@ def get_model_context_length( return metadata[model].get("context_length", 128000) # 8. Hardcoded defaults (fuzzy match — longest key first for specificity) + # Only check `default_model in model` (is the key a substring of the input). + # The reverse (`model in default_model`) causes shorter names like + # "claude-sonnet-4" to incorrectly match "claude-sonnet-4-6" and return 1M. for default_model, length in sorted( DEFAULT_CONTEXT_LENGTHS.items(), key=lambda x: len(x[0]), reverse=True ): - if default_model in model or model in default_model: + if default_model in model: return length # 9. Query local server as last resort diff --git a/agent/models_dev.py b/agent/models_dev.py index b564db8efa..0ef2b62cde 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -107,11 +107,12 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]: except Exception as e: logger.debug("Failed to fetch models.dev: %s", e) - # Fall back to disk cache + # Fall back to disk cache — use a short TTL (5 min) so we retry + # the network fetch soon instead of serving stale data for a full hour. if not _models_dev_cache: _models_dev_cache = _load_disk_cache() if _models_dev_cache: - _models_dev_cache_time = time.time() + _models_dev_cache_time = time.time() - _MODELS_DEV_CACHE_TTL + 300 logger.debug("Loaded models.dev from disk cache (%d providers)", len(_models_dev_cache)) return _models_dev_cache diff --git a/run_agent.py b/run_agent.py index 78948a7827..1c3b25fe22 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1142,23 +1142,24 @@ class AIAgent: def _has_content_after_think_block(self, content: str) -> bool: """ - Check if content has actual text after any blocks. - + Check if content has actual text after any reasoning/thinking blocks. + This detects cases where the model only outputs reasoning but no actual response, which indicates an incomplete generation that should be retried. - + Must stay in sync with _strip_think_blocks() tag variants. + Args: content: The assistant message content to check - + Returns: True if there's meaningful content after think blocks, False otherwise """ if not content: return False - - # Remove all ... blocks (including nested ones, non-greedy) - cleaned = re.sub(r'.*?', '', content, flags=re.DOTALL) - + + # Remove all reasoning tag variants (must match _strip_think_blocks) + cleaned = self._strip_think_blocks(content) + # Check if there's any non-whitespace content remaining return bool(cleaned.strip()) diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 9a148305a2..36c6dad984 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -470,22 +470,25 @@ def delegate_task( _parent_tool_names = list(_model_tools._last_resolved_tool_names) # Build all child agents on the main thread (thread-safe construction) + # Wrapped in try/finally so the global is always restored even if a + # child build raises (otherwise _last_resolved_tool_names stays corrupted). children = [] - for i, t in enumerate(task_list): - child = _build_child_agent( - task_index=i, goal=t["goal"], context=t.get("context"), - toolsets=t.get("toolsets") or toolsets, model=creds["model"], - max_iterations=effective_max_iter, parent_agent=parent_agent, - override_provider=creds["provider"], override_base_url=creds["base_url"], - override_api_key=creds["api_key"], - override_api_mode=creds["api_mode"], - ) - # Override with correct parent tool names (before child construction mutated global) - child._delegate_saved_tool_names = _parent_tool_names - children.append((i, t, child)) - - # Authoritative restore: reset global to parent's tool names after all children built - _model_tools._last_resolved_tool_names = _parent_tool_names + try: + for i, t in enumerate(task_list): + child = _build_child_agent( + task_index=i, goal=t["goal"], context=t.get("context"), + toolsets=t.get("toolsets") or toolsets, model=creds["model"], + max_iterations=effective_max_iter, parent_agent=parent_agent, + override_provider=creds["provider"], override_base_url=creds["base_url"], + override_api_key=creds["api_key"], + override_api_mode=creds["api_mode"], + ) + # Override with correct parent tool names (before child construction mutated global) + child._delegate_saved_tool_names = _parent_tool_names + children.append((i, t, child)) + finally: + # Authoritative restore: reset global to parent's tool names after all children built + _model_tools._last_resolved_tool_names = _parent_tool_names if n_tasks == 1: # Single task -- run directly (no thread pool overhead)