diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 9ed6c4a2bc..be63719e24 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -34,17 +34,29 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
})
+_OLLAMA_TAG_PATTERN = re.compile(
+ r"^(\d+\.?\d*b|latest|stable|q\d|fp?\d|instruct|chat|coder|vision|text)",
+ re.IGNORECASE,
+)
+
+
def _strip_provider_prefix(model: str) -> str:
"""Strip a recognised provider prefix from a model string.
``"local:my-model"`` → ``"my-model"``
``"qwen3.5:27b"`` → ``"qwen3.5:27b"`` (unchanged — not a provider prefix)
+ ``"qwen:0.5b"`` → ``"qwen:0.5b"`` (unchanged — Ollama model:tag)
+ ``"deepseek:latest"``→ ``"deepseek:latest"``(unchanged — Ollama model:tag)
"""
if ":" not in model or model.startswith("http"):
return model
- prefix = model.split(":", 1)[0].strip().lower()
- if prefix in _PROVIDER_PREFIXES:
- return model.split(":", 1)[1]
+ prefix, suffix = model.split(":", 1)
+ prefix_lower = prefix.strip().lower()
+ if prefix_lower in _PROVIDER_PREFIXES:
+ # Don't strip if suffix looks like an Ollama tag (e.g. "7b", "latest", "q4_0")
+ if _OLLAMA_TAG_PATTERN.match(suffix.strip()):
+ return model
+ return suffix
return model
_model_metadata_cache: Dict[str, Dict[str, Any]] = {}
@@ -800,7 +812,7 @@ def get_model_context_length(
ctx = _resolve_nous_context_length(model)
if ctx:
return ctx
- elif provider:
+ if provider:
from agent.models_dev import lookup_models_dev_context
ctx = lookup_models_dev_context(provider, model)
if ctx:
@@ -812,10 +824,13 @@ def get_model_context_length(
return metadata[model].get("context_length", 128000)
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
+ # Only check `default_model in model` (is the key a substring of the input).
+ # The reverse (`model in default_model`) causes shorter names like
+ # "claude-sonnet-4" to incorrectly match "claude-sonnet-4-6" and return 1M.
for default_model, length in sorted(
DEFAULT_CONTEXT_LENGTHS.items(), key=lambda x: len(x[0]), reverse=True
):
- if default_model in model or model in default_model:
+ if default_model in model:
return length
# 9. Query local server as last resort
diff --git a/agent/models_dev.py b/agent/models_dev.py
index b564db8efa..0ef2b62cde 100644
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -107,11 +107,12 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
except Exception as e:
logger.debug("Failed to fetch models.dev: %s", e)
- # Fall back to disk cache
+ # Fall back to disk cache — use a short TTL (5 min) so we retry
+ # the network fetch soon instead of serving stale data for a full hour.
if not _models_dev_cache:
_models_dev_cache = _load_disk_cache()
if _models_dev_cache:
- _models_dev_cache_time = time.time()
+ _models_dev_cache_time = time.time() - _MODELS_DEV_CACHE_TTL + 300
logger.debug("Loaded models.dev from disk cache (%d providers)", len(_models_dev_cache))
return _models_dev_cache
diff --git a/run_agent.py b/run_agent.py
index 78948a7827..1c3b25fe22 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1142,23 +1142,24 @@ class AIAgent:
def _has_content_after_think_block(self, content: str) -> bool:
"""
- Check if content has actual text after any blocks.
-
+ Check if content has actual text after any reasoning/thinking blocks.
+
This detects cases where the model only outputs reasoning but no actual
response, which indicates an incomplete generation that should be retried.
-
+ Must stay in sync with _strip_think_blocks() tag variants.
+
Args:
content: The assistant message content to check
-
+
Returns:
True if there's meaningful content after think blocks, False otherwise
"""
if not content:
return False
-
- # Remove all ... blocks (including nested ones, non-greedy)
- cleaned = re.sub(r'.*?', '', content, flags=re.DOTALL)
-
+
+ # Remove all reasoning tag variants (must match _strip_think_blocks)
+ cleaned = self._strip_think_blocks(content)
+
# Check if there's any non-whitespace content remaining
return bool(cleaned.strip())
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 9a148305a2..36c6dad984 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -470,22 +470,25 @@ def delegate_task(
_parent_tool_names = list(_model_tools._last_resolved_tool_names)
# Build all child agents on the main thread (thread-safe construction)
+ # Wrapped in try/finally so the global is always restored even if a
+ # child build raises (otherwise _last_resolved_tool_names stays corrupted).
children = []
- for i, t in enumerate(task_list):
- child = _build_child_agent(
- task_index=i, goal=t["goal"], context=t.get("context"),
- toolsets=t.get("toolsets") or toolsets, model=creds["model"],
- max_iterations=effective_max_iter, parent_agent=parent_agent,
- override_provider=creds["provider"], override_base_url=creds["base_url"],
- override_api_key=creds["api_key"],
- override_api_mode=creds["api_mode"],
- )
- # Override with correct parent tool names (before child construction mutated global)
- child._delegate_saved_tool_names = _parent_tool_names
- children.append((i, t, child))
-
- # Authoritative restore: reset global to parent's tool names after all children built
- _model_tools._last_resolved_tool_names = _parent_tool_names
+ try:
+ for i, t in enumerate(task_list):
+ child = _build_child_agent(
+ task_index=i, goal=t["goal"], context=t.get("context"),
+ toolsets=t.get("toolsets") or toolsets, model=creds["model"],
+ max_iterations=effective_max_iter, parent_agent=parent_agent,
+ override_provider=creds["provider"], override_base_url=creds["base_url"],
+ override_api_key=creds["api_key"],
+ override_api_mode=creds["api_mode"],
+ )
+ # Override with correct parent tool names (before child construction mutated global)
+ child._delegate_saved_tool_names = _parent_tool_names
+ children.append((i, t, child))
+ finally:
+ # Authoritative restore: reset global to parent's tool names after all children built
+ _model_tools._last_resolved_tool_names = _parent_tool_names
if n_tasks == 1:
# Single task -- run directly (no thread pool overhead)