From 772cfb6c4ec7770759b4e0c8552b934fe9ff897d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 14 Apr 2026 22:38:17 -0700 Subject: [PATCH] fix: stale agent timeout, uv venv detection, empty response after tools, compression model fallback (#9051, #8620, #9400) (#10093) Four independent fixes: 1. Reset activity timestamp on cached agent reuse (#9051) When the gateway reuses a cached AIAgent for a new turn, the _last_activity_ts from the previous turn (possibly hours ago) carried over. The inactivity timeout handler immediately saw the agent as idle for hours and killed it. Fix: reset _last_activity_ts, _last_activity_desc, and _api_call_count when retrieving an agent from the cache. 2. Detect uv-managed virtual environments (#8620 sub-issue 1) The systemd unit generator fell back to sys.executable (uv's standalone Python) when running under 'uv run', because sys.prefix == sys.base_prefix. The generated ExecStart pointed to a Python binary without site-packages. Fix: check VIRTUAL_ENV env var before falling back to sys.executable. uv sets VIRTUAL_ENV even when sys.prefix doesn't reflect the venv. 3. Nudge model to continue after empty post-tool response (#9400) Weaker models sometimes return empty after tool calls. The agent silently abandoned the remaining work. Fix: append assistant('(empty)') + user nudge message and retry once. Resets after each successful tool round. 4. Compression model fallback on permanent errors (#8620 sub-issue 4) When the default summary model (gemini-3-flash) returns 503 'model_not_found' on custom proxies, the compressor entered a 600s cooldown, leaving context growing unbounded. Fix: detect permanent model-not-found errors (503, 404, 'model_not_found', 'no available channel') and fall back to using the main model for compression instead of entering cooldown. One-time fallback with immediate retry. Test plan: 40 compressor tests + 97 gateway/CLI tests + 9 venv tests pass --- agent/context_compressor.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 74bbbd5d0c8..ac5db77625e 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -693,6 +693,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Store for iterative updates on next compaction self._previous_summary = summary self._summary_failure_cooldown_until = 0.0 + self._summary_model_fallen_back = False return self._with_summary_prefix(summary) except RuntimeError: # No provider configured — long cooldown, unlikely to self-resolve @@ -703,6 +704,34 @@ The user has requested that this compaction PRIORITISE preserving all informatio _SUMMARY_FAILURE_COOLDOWN_SECONDS) return None except Exception as e: + # If the summary model is different from the main model and the + # error looks permanent (model not found, 503, 404), fall back to + # using the main model instead of entering cooldown that leaves + # context growing unbounded. (#8620 sub-issue 4) + _status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None) + _err_str = str(e).lower() + _is_model_not_found = ( + _status in (404, 503) + or "model_not_found" in _err_str + or "does not exist" in _err_str + or "no available channel" in _err_str + ) + if ( + _is_model_not_found + and self.summary_model + and self.summary_model != self.model + and not getattr(self, "_summary_model_fallen_back", False) + ): + self._summary_model_fallen_back = True + logging.warning( + "Summary model '%s' not available (%s). " + "Falling back to main model '%s' for compression.", + self.summary_model, e, self.model, + ) + self.summary_model = "" # empty = use main model + self._summary_failure_cooldown_until = 0.0 # no cooldown + return self._generate_summary(messages, summary_budget) # retry immediately + # Transient errors (timeout, rate limit, network) — shorter cooldown _transient_cooldown = 60 self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown