fix(gateway): make reasoning session-scoped by default

This commit is contained in:
Iris Jin
2026-04-25 02:39:01 -04:00
committed by Teknium
parent 489bed6f96
commit 25ba6a4a74
4 changed files with 269 additions and 23 deletions

View File

@@ -638,6 +638,7 @@ class GatewayRunner:
_restart_via_service: bool = False
_stop_task: Optional[asyncio.Task] = None
_session_model_overrides: Dict[str, Dict[str, str]] = {}
_session_reasoning_overrides: Dict[str, Dict[str, Any]] = {}
def __init__(self, config: Optional[GatewayConfig] = None):
self.config = config or load_gateway_config()
@@ -701,6 +702,9 @@ class GatewayRunner:
# Per-session model overrides from /model command.
# Key: session_key, Value: dict with model/provider/api_key/base_url/api_mode
self._session_model_overrides: Dict[str, Dict[str, str]] = {}
# Per-session reasoning effort overrides from /reasoning.
# Key: session_key, Value: parsed reasoning config dict.
self._session_reasoning_overrides: Dict[str, Dict[str, Any]] = {}
# Track pending exec approvals per session
# Key: session_key, Value: {"command": str, "pattern_key": str, ...}
self._pending_approvals: Dict[str, Dict[str, Any]] = {}
@@ -1263,6 +1267,66 @@ class GatewayRunner:
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
return result
@staticmethod
def _parse_reasoning_command_args(raw_args: str) -> tuple[str, bool]:
"""Parse `/reasoning` args into `(value, persist_global)`.
`/reasoning <level>` is session-scoped by default. `--global` may be
supplied in any position to persist the change to config.yaml.
"""
import shlex
text = str(raw_args or "").strip().replace("", "--")
if not text:
return "", False
try:
tokens = shlex.split(text)
except ValueError:
tokens = text.split()
persist_global = False
value_tokens = []
for token in tokens:
if token == "--global":
persist_global = True
else:
value_tokens.append(token)
return " ".join(value_tokens).strip().lower(), persist_global
def _resolve_session_reasoning_config(
self,
*,
source: Optional[SessionSource] = None,
session_key: Optional[str] = None,
) -> dict | None:
"""Resolve reasoning effort for a session, honoring session overrides."""
resolved_session_key = session_key
if not resolved_session_key and source is not None:
try:
resolved_session_key = self._session_key_for_source(source)
except Exception:
resolved_session_key = None
overrides = getattr(self, "_session_reasoning_overrides", {}) or {}
if resolved_session_key and resolved_session_key in overrides:
return overrides[resolved_session_key]
return self._load_reasoning_config()
def _set_session_reasoning_override(
self,
session_key: str,
reasoning_config: Optional[dict],
) -> None:
"""Set or clear the session-scoped reasoning override."""
if not session_key:
return
if not hasattr(self, "_session_reasoning_overrides"):
self._session_reasoning_overrides = {}
if reasoning_config is None:
self._session_reasoning_overrides.pop(session_key, None)
else:
self._session_reasoning_overrides[session_key] = dict(reasoning_config)
@staticmethod
def _load_service_tier() -> str | None:
"""Load Priority Processing setting from config.yaml.
@@ -3982,6 +4046,8 @@ class GatewayRunner:
# Get or create session
session_entry = self.session_store.get_or_create_session(source)
session_key = session_entry.session_key
if getattr(session_entry, "was_auto_reset", False):
self._set_session_reasoning_override(session_key, None)
# Emit session:start for new or auto-reset sessions
_is_new_session = (
@@ -4652,6 +4718,7 @@ class GatewayRunner:
self.session_store.reset_session(session_key)
self._evict_cached_agent(session_key)
self._session_model_overrides.pop(session_key, None)
self._set_session_reasoning_override(session_key, None)
response = (response or "") + (
"\n\n🔄 Session auto-reset — the conversation exceeded the "
"maximum context size and could not be compressed further. "
@@ -4928,9 +4995,10 @@ class GatewayRunner:
# Reset the session
new_entry = self.session_store.reset_session(session_key)
# Clear any session-scoped model override so the next agent picks up
# the configured default instead of the previously switched model.
# Clear any session-scoped model/reasoning overrides so the next agent
# picks up configured defaults instead of previous session switches.
self._session_model_overrides.pop(session_key, None)
self._set_session_reasoning_override(session_key, None)
# Clear session-scoped dangerous-command approvals and /yolo state.
# /new is a conversation-boundary operation — approval state from the
@@ -6417,7 +6485,7 @@ class GatewayRunner:
pr = self._provider_routing
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
reasoning_config = self._load_reasoning_config()
reasoning_config = self._resolve_session_reasoning_config(source=source)
self._reasoning_config = reasoning_config
self._service_tier = self._load_service_tier()
turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs)
@@ -6590,7 +6658,10 @@ class GatewayRunner:
return
platform_key = _platform_config_key(source.platform)
reasoning_config = self._load_reasoning_config()
reasoning_config = self._resolve_session_reasoning_config(
source=source,
session_key=session_key,
)
self._service_tier = self._load_service_tier()
turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs)
pr = self._provider_routing
@@ -6696,17 +6767,24 @@ class GatewayRunner:
"""Handle /reasoning command — manage reasoning effort and display toggle.
Usage:
/reasoning Show current effort level and display state
/reasoning <level> Set reasoning effort (none, minimal, low, medium, high, xhigh)
/reasoning show|on Show model reasoning in responses
/reasoning hide|off Hide model reasoning from responses
/reasoning Show current effort level and display state
/reasoning <level> Set reasoning effort for this session only
/reasoning <level> --global Persist reasoning effort to config.yaml
/reasoning reset Clear this session's reasoning override
/reasoning show|on Show model reasoning in responses
/reasoning hide|off Hide model reasoning from responses
"""
import yaml
args = event.get_command_args().strip().lower()
raw_args = event.get_command_args().strip()
args, persist_global = self._parse_reasoning_command_args(raw_args)
config_path = _hermes_home / "config.yaml"
self._reasoning_config = self._load_reasoning_config()
session_key = self._session_key_for_source(event.source)
self._show_reasoning = self._load_show_reasoning()
self._reasoning_config = self._resolve_session_reasoning_config(
source=event.source,
session_key=session_key,
)
def _save_config_key(key_path: str, value):
"""Save a dot-separated key to config.yaml."""
@@ -6728,7 +6806,7 @@ class GatewayRunner:
logger.error("Failed to save config key %s: %s", key_path, e)
return False
if not args:
if not raw_args:
# Show current state
rc = self._reasoning_config
if rc is None:
@@ -6738,11 +6816,14 @@ class GatewayRunner:
else:
level = rc.get("effort", "medium")
display_state = "on ✓" if self._show_reasoning else "off"
has_session_override = session_key in (getattr(self, "_session_reasoning_overrides", {}) or {})
scope = "session override" if has_session_override else "global config"
return (
"🧠 **Reasoning Settings**\n\n"
f"**Effort:** `{level}`\n"
f"**Scope:** {scope}\n"
f"**Display:** {display_state}\n\n"
"_Usage:_ `/reasoning <none|minimal|low|medium|high|xhigh|show|hide>`"
"_Usage:_ `/reasoning <none|minimal|low|medium|high|xhigh|reset|show|hide> [--global]`"
)
# Display toggle (per-platform)
@@ -6762,22 +6843,38 @@ class GatewayRunner:
# Effort level change
effort = args.strip()
if effort == "reset":
if persist_global:
return "⚠️ `/reasoning reset --global` is not supported. Use `/reasoning <level> --global` to change the global default."
self._set_session_reasoning_override(session_key, None)
self._reasoning_config = self._load_reasoning_config()
self._evict_cached_agent(session_key)
return "🧠 ✓ Session reasoning override cleared; falling back to global config."
if effort == "none":
parsed = {"enabled": False}
elif effort in ("minimal", "low", "medium", "high", "xhigh"):
parsed = {"enabled": True, "effort": effort}
else:
return (
f"⚠️ Unknown argument: `{effort}`\n\n"
f"⚠️ Unknown argument: `{effort or raw_args.lower()}`\n\n"
"**Valid levels:** none, minimal, low, medium, high, xhigh\n"
"**Display:** show, hide"
"**Display:** show, hide\n"
"**Persist:** add `--global` to save beyond this session"
)
self._reasoning_config = parsed
if _save_config_key("agent.reasoning_effort", effort):
return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
else:
return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"
if persist_global:
if _save_config_key("agent.reasoning_effort", effort):
self._set_session_reasoning_override(session_key, None)
self._evict_cached_agent(session_key)
return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
self._set_session_reasoning_override(session_key, parsed)
self._evict_cached_agent(session_key)
return f"🧠 ✓ Reasoning effort set to `{effort}` (session only — config save failed)\n_(takes effect on next message)_"
self._set_session_reasoning_override(session_key, parsed)
self._evict_cached_agent(session_key)
return f"🧠 ✓ Reasoning effort set to `{effort}` (session only — add `--global` to persist)\n_(takes effect on next message)_"
async def _handle_fast_command(self, event: MessageEvent) -> str:
"""Handle /fast — mirror the CLI Priority Processing toggle in gateway chats."""
@@ -9579,7 +9676,10 @@ class GatewayRunner:
}
pr = self._provider_routing
reasoning_config = self._load_reasoning_config()
reasoning_config = self._resolve_session_reasoning_config(
source=source,
session_key=session_key,
)
self._reasoning_config = reasoning_config
self._service_tier = self._load_service_tier()
# Set up stream consumer for token streaming or interim commentary.

View File

@@ -33,6 +33,7 @@ def _make_runner():
runner._ephemeral_system_prompt = ""
runner._prefill_messages = []
runner._reasoning_config = None
runner._session_reasoning_overrides = {}
runner._show_reasoning = False
runner._provider_routing = {}
runner._fallback_model = None
@@ -76,6 +77,10 @@ class TestReasoningCommand:
source = inspect.getsource(gateway_run.GatewayRunner._handle_message)
assert '"reasoning"' in source
def test_parse_reasoning_command_args_accepts_ascii_and_smart_global_flags(self):
assert gateway_run.GatewayRunner._parse_reasoning_command_args("high --global") == ("high", True)
assert gateway_run.GatewayRunner._parse_reasoning_command_args("—global xhigh") == ("xhigh", True)
@pytest.mark.asyncio
async def test_reasoning_command_reloads_current_state_from_config(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
@@ -111,13 +116,90 @@ class TestReasoningCommand:
runner = _make_runner()
runner._reasoning_config = {"enabled": True, "effort": "medium"}
result = await runner._handle_reasoning_command(_make_event("/reasoning low"))
result = await runner._handle_reasoning_command(_make_event("/reasoning low --global"))
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert saved["agent"]["reasoning_effort"] == "low"
assert runner._reasoning_config == {"enabled": True, "effort": "low"}
assert "takes effect on next message" in result
@pytest.mark.asyncio
async def test_handle_reasoning_command_defaults_to_session_only(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
runner = _make_runner()
event = _make_event("/reasoning high")
session_key = runner._session_key_for_source(event.source)
result = await runner._handle_reasoning_command(event)
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert saved["agent"]["reasoning_effort"] == "medium"
assert runner._session_reasoning_overrides[session_key] == {"enabled": True, "effort": "high"}
assert runner._reasoning_config == {"enabled": True, "effort": "high"}
assert "session only" in result
@pytest.mark.asyncio
async def test_reasoning_global_clears_existing_session_override(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
runner = _make_runner()
event = _make_event("/reasoning low --global")
session_key = runner._session_key_for_source(event.source)
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
result = await runner._handle_reasoning_command(event)
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert saved["agent"]["reasoning_effort"] == "low"
assert session_key not in runner._session_reasoning_overrides
assert "saved to config" in result
@pytest.mark.asyncio
async def test_reasoning_reset_clears_session_override_without_config_write(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
runner = _make_runner()
event = _make_event("/reasoning reset")
session_key = runner._session_key_for_source(event.source)
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
result = await runner._handle_reasoning_command(event)
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert saved["agent"]["reasoning_effort"] == "medium"
assert session_key not in runner._session_reasoning_overrides
assert "cleared" in result
def test_resolve_session_reasoning_prefers_session_override(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
runner = _make_runner()
source = _make_event("/reasoning").source
session_key = runner._session_key_for_source(source)
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
assert runner._resolve_session_reasoning_config(source=source) == {"enabled": True, "effort": "xhigh"}
def test_run_agent_reloads_reasoning_config_per_message(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
@@ -167,6 +249,56 @@ class TestReasoningCommand:
assert _CapturingAgent.last_init is not None
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "low"}
def test_run_agent_prefers_session_reasoning_override(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env")
monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
monkeypatch.setattr(
gateway_run,
"_resolve_runtime_agent_kwargs",
lambda: {
"provider": "openrouter",
"api_mode": "chat_completions",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "***",
},
)
fake_run_agent = types.ModuleType("run_agent")
fake_run_agent.AIAgent = _CapturingAgent
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
_CapturingAgent.last_init = None
runner = _make_runner()
session_key = "agent:main:local:dm"
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
source = SessionSource(
platform=Platform.LOCAL,
chat_id="cli",
chat_name="CLI",
chat_type="dm",
user_id="user-1",
)
result = asyncio.run(
runner._run_agent(
message="ping",
context_prompt="",
history=[],
source=source,
session_id="session-1",
session_key=session_key,
)
)
assert result["final_response"] == "ok"
assert _CapturingAgent.last_init is not None
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
def test_run_agent_includes_enabled_mcp_servers_in_gateway_toolsets(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()

View File

@@ -54,6 +54,7 @@ def _make_runner():
runner._background_tasks = set()
runner._session_db = None
runner._session_model_overrides = {}
runner._session_reasoning_overrides = {}
runner._pending_model_notes = {}
runner._pending_approvals = {}
runner._agent_cache = {}
@@ -102,6 +103,7 @@ def test_run_agent_prefers_session_override_over_global_runtime(monkeypatch):
)
session_key = "agent:main:local:dm"
runner._session_model_overrides[session_key] = _codex_override()
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
result = asyncio.run(
runner._run_agent(
@@ -121,6 +123,7 @@ def test_run_agent_prefers_session_override_over_global_runtime(monkeypatch):
assert _CapturingAgent.last_init["api_mode"] == "codex_responses"
assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex"
assert _CapturingAgent.last_init["api_key"] == "***"
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
@pytest.mark.asyncio
@@ -149,6 +152,7 @@ async def test_background_task_prefers_session_override_over_global_runtime(monk
)
session_key = runner._session_key_for_source(source)
runner._session_model_overrides[session_key] = _codex_override()
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
await runner._run_background_task("say hello", source, "bg_test")
@@ -158,3 +162,4 @@ async def test_background_task_prefers_session_override_over_global_runtime(monk
assert _CapturingAgent.last_init["api_mode"] == "codex_responses"
assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex"
assert _CapturingAgent.last_init["api_key"] == "***"
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}

View File

@@ -1,4 +1,4 @@
"""Tests that /new (and its /reset alias) clears the session-scoped model override."""
"""Tests that /new (and its /reset alias) clears session-scoped overrides."""
from datetime import datetime
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
@@ -37,6 +37,7 @@ def _make_runner():
runner._voice_mode = {}
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
runner._session_model_overrides = {}
runner._session_reasoning_overrides = {}
runner._pending_model_notes = {}
runner._background_tasks = set()
@@ -75,14 +76,16 @@ async def test_new_command_clears_session_model_override():
runner._session_model_overrides[session_key] = {
"model": "gpt-4o",
"provider": "openai",
"api_key": "sk-test",
"api_key": "***",
"base_url": "",
"api_mode": "openai",
}
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
await runner._handle_reset_command(_make_event("/new"))
assert session_key not in runner._session_model_overrides
assert session_key not in runner._session_reasoning_overrides
@pytest.mark.asyncio
@@ -92,10 +95,12 @@ async def test_new_command_no_override_is_noop():
session_key = build_session_key(_make_source())
assert session_key not in runner._session_model_overrides
assert session_key not in runner._session_reasoning_overrides
await runner._handle_reset_command(_make_event("/new"))
assert session_key not in runner._session_model_overrides
assert session_key not in runner._session_reasoning_overrides
@pytest.mark.asyncio
@@ -115,12 +120,16 @@ async def test_new_command_only_clears_own_session():
runner._session_model_overrides[other_key] = {
"model": "claude-sonnet-4-6",
"provider": "anthropic",
"api_key": "sk-ant-test",
"api_key": "***",
"base_url": "",
"api_mode": "anthropic",
}
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
runner._session_reasoning_overrides[other_key] = {"enabled": True, "effort": "low"}
await runner._handle_reset_command(_make_event("/new"))
assert session_key not in runner._session_model_overrides
assert other_key in runner._session_model_overrides
assert session_key not in runner._session_reasoning_overrides
assert other_key in runner._session_reasoning_overrides