fix: use consistent fallback for pickle_key derivation

Address review: _pickle_key now uses _acct_id (which has the 'hermes' fallback) instead of raw self._user_id, so both values stay consistent when user_id is empty.
fix(matrix): pass required args to MemoryCryptoStore for mautrix ≥0.21
2026-06-30 15:25:48 +08:00 · 2026-04-11 17:26:17 +00:00 · 2026-04-11 17:03:38 +00:00 · 2026-04-11 03:29:31 -07:00 · 2026-04-11 03:13:23 -07:00
6 changed files with 58 additions and 8 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -113,7 +113,10 @@ DEFAULT_CONTEXT_LENGTHS = {
    "deepseek": 128000,
    # Meta
    "llama": 131072,
-    # Qwen
+    # Qwen — specific model families before the catch-all.
+    # Official docs: https://help.aliyun.com/zh/model-studio/developer-reference/
+    "qwen3-coder-plus": 1000000,  # 1M context
+    "qwen3-coder": 262144,        # 256K context
    "qwen": 131072,
    # MiniMax — official docs: 204,800 context for all models
    # https://platform.minimax.io/docs/api-reference/text-anthropic-api
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@@ -352,7 +352,16 @@ class MatrixAdapter(BasePlatformAdapter):
                from mautrix.crypto import OlmMachine
                from mautrix.crypto.store import MemoryCryptoStore

-                crypto_store = MemoryCryptoStore()
+                # account_id and pickle_key are required by mautrix ≥0.21.
+                # Use the Matrix user ID as account_id for stable identity.
+                # pickle_key secures in-memory serialisation; derive from
+                # the same user_id:device_id pair used for the on-disk HMAC.
+                _acct_id = self._user_id or "hermes"
+                _pickle_key = f"{_acct_id}:{self._device_id}"
+                crypto_store = MemoryCryptoStore(
+                    account_id=_acct_id,
+                    pickle_key=_pickle_key,
+                )

                # Restore persisted crypto state from a previous run.
                # Uses HMAC to verify integrity before unpickling.
--- a/run_agent.py
+++ b/run_agent.py
@@ -5888,8 +5888,16 @@ class AIAgent:
            api_kwargs["tools"] = self.tools

        if self.max_tokens is not None:
-            if not self._is_qwen_portal():
-                api_kwargs.update(self._max_tokens_param(self.max_tokens))
+            api_kwargs.update(self._max_tokens_param(self.max_tokens))
+        elif self._is_qwen_portal():
+            # Qwen Portal defaults to a very low max_tokens when omitted.
+            # Reasoning models (qwen3-coder-plus) exhaust that budget on
+            # thinking tokens alone, causing the portal to return
+            # finish_reason="stop" with truncated output — the agent sees
+            # this as an intentional stop and exits the loop.  Send 65536
+            # (the documented max output for qwen3-coder models) so the
+            # model has adequate output budget for tool calls.
+            api_kwargs.update(self._max_tokens_param(65536))
        elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower():
            # OpenRouter and Nous Portal translate requests to Anthropic's
            # Messages API, which requires max_tokens as a mandatory field.
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@@ -222,6 +222,24 @@ class TestGetModelContextLength:
        mock_fetch.return_value = {}
        assert get_model_context_length("openai/gpt-4o") == 128000

+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_qwen3_coder_plus_context_length(self, mock_fetch):
+        """qwen3-coder-plus has a 1M context window, not the generic 128K Qwen default."""
+        mock_fetch.return_value = {}
+        assert get_model_context_length("qwen3-coder-plus") == 1000000
+
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_qwen3_coder_context_length(self, mock_fetch):
+        """qwen3-coder has a 256K context window, not the generic 128K Qwen default."""
+        mock_fetch.return_value = {}
+        assert get_model_context_length("qwen3-coder") == 262144
+
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_qwen_generic_context_length(self, mock_fetch):
+        """Generic qwen models still get the 128K default."""
+        mock_fetch.return_value = {}
+        assert get_model_context_length("qwen3-plus") == 131072
+
    @patch("agent.model_metadata.fetch_model_metadata")
    def test_api_missing_context_length_key(self, mock_fetch):
        """Model in API but without context_length → defaults to 128000."""
--- a/tests/gateway/test_matrix.py
+++ b/tests/gateway/test_matrix.py
@@ -157,7 +157,9 @@ def _make_fake_mautrix():
    mautrix_crypto_store = types.ModuleType("mautrix.crypto.store")

    class MemoryCryptoStore:
-        pass
+        def __init__(self, account_id="", pickle_key=""):
+            self.account_id = account_id
+            self.pickle_key = pickle_key

    mautrix_crypto_store.MemoryCryptoStore = MemoryCryptoStore

--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -953,14 +953,24 @@ class TestBuildApiKwargs:
        assert kwargs["messages"][0]["content"][0]["text"] == "hi"
        assert "cache_control" not in kwargs["messages"][0]["content"][0]

-    def test_qwen_portal_omits_max_tokens(self, agent):
+    def test_qwen_portal_sends_explicit_max_tokens(self, agent):
+        """When the user explicitly sets max_tokens, it should be sent to Qwen Portal."""
        agent.base_url = "https://portal.qwen.ai/v1"
        agent._base_url_lower = agent.base_url.lower()
        agent.max_tokens = 4096
        messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
-        assert "max_tokens" not in kwargs
-        assert "max_completion_tokens" not in kwargs
+        assert kwargs["max_tokens"] == 4096
+
+    def test_qwen_portal_default_max_tokens(self, agent):
+        """When max_tokens is None, Qwen Portal gets a default of 65536
+        to prevent reasoning models from exhausting their output budget."""
+        agent.base_url = "https://portal.qwen.ai/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.max_tokens = None
+        messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["max_tokens"] == 65536


 class TestBuildAssistantMessage:
Author	SHA1	Message	Date
Hermes Agent	de7253dcdf	fix: use consistent fallback for pickle_key derivation Address review: _pickle_key now uses _acct_id (which has the 'hermes' fallback) instead of raw self._user_id, so both values stay consistent when user_id is empty.	2026-04-11 17:26:17 +00:00
Hermes Agent	050f496816	fix(matrix): pass required args to MemoryCryptoStore for mautrix ≥0.21 MemoryCryptoStore.__init__() now requires account_id and pickle_key positional arguments as of mautrix 0.21. The migration from matrix-nio (commit `1850747`) didn't account for this, causing E2EE initialization to fail with: MemoryCryptoStore.__init__() missing 2 required positional arguments: 'account_id' and 'pickle_key' Pass self._user_id as account_id and derive pickle_key from the same user_id:device_id pair already used for the on-disk HMAC signature. Update the test stub to accept the new parameters. Fixes #7803	2026-04-11 17:03:38 +00:00
kshitijk4poor	af9caec44f	fix(qwen): correct context lengths for qwen3-coder models and send max_tokens to portal Based on PR #7285 by @kshitijk4poor. Two bugs affecting Qwen OAuth users: 1. Wrong context window — qwen3-coder-plus showed 128K instead of 1M. Added specific entries before the generic qwen catch-all: - qwen3-coder-plus: 1,000,000 (corrected from PR's 1,048,576 per official Alibaba Cloud docs and OpenRouter) - qwen3-coder: 262,144 2. Random stopping — max_tokens was suppressed for Qwen Portal, so the server applied its own low default. Reasoning models exhaust that on thinking tokens. Now: honor explicit max_tokens, default to 65536 when unset. Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>	2026-04-11 03:29:31 -07:00
Teknium	f459214010	feat: background process monitoring — watch_patterns for real-time output alerts * feat: add watch_patterns to background processes for output monitoring Adds a new 'watch_patterns' parameter to terminal(background=true) that lets the agent specify strings to watch for in process output. When a matching line appears, a notification is queued and injected as a synthetic message — triggering a new agent turn, similar to notify_on_complete but mid-process. Implementation: - ProcessSession gets watch_patterns field + rate-limit state - _check_watch_patterns() in ProcessRegistry scans new output chunks from all three reader threads (local, PTY, env-poller) - Rate limited: max 8 notifications per 10s window - Sustained overload (45s) permanently disables watching for that process - watch_queue alongside completion_queue, same consumption pattern - CLI drains watch_queue in both idle loop and post-turn drain - Gateway drains after agent runs via _inject_watch_notification() - Checkpoint persistence + crash recovery includes watch_patterns - Blocked in execute_code sandbox (like other bg params) - 20 new tests covering matching, rate limiting, overload kill, checkpoint persistence, schema, and handler passthrough Usage: terminal( command='npm run dev', background=true, watch_patterns=['ERROR', 'WARN', 'listening on port'] ) * refactor: merge watch_queue into completion_queue Unified queue with 'type' field distinguishing 'completion', 'watch_match', and 'watch_disabled' events. Extracted _format_process_notification() in CLI and gateway to handle all event types in a single drain loop. Removes duplication across both CLI drain sites and the gateway.	2026-04-11 03:13:23 -07:00