From 5822711ae66758d580d2337bd5fac6e616eb00bc Mon Sep 17 00:00:00 2001 From: Test Date: Thu, 19 Mar 2026 17:53:05 -0700 Subject: [PATCH 1/5] =?UTF-8?q?fix:=20complete=20session=20reset=20?= =?UTF-8?q?=E2=80=94=20missing=20compressor=20counters=20+=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to PR #2101 (InB4DevOps). Adds three missing context compressor resets in reset_session_state(): - compression_count (displayed in status bar) - last_total_tokens - _context_probed (stale context-error flag) Also fixes the test_cli_new_session.py prompt_toolkit mock (missing auto_suggest stub) and adds a regression test for #2099 that verifies all token counters and compressor state are zeroed on /new. --- run_agent.py | 3 ++ tests/test_cli_new_session.py | 83 +++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/run_agent.py b/run_agent.py index ffcd809f922..cb0855f8786 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1053,6 +1053,9 @@ class AIAgent: if hasattr(self, "context_compressor") and self.context_compressor: self.context_compressor.last_prompt_tokens = 0 self.context_compressor.last_completion_tokens = 0 + self.context_compressor.last_total_tokens = 0 + self.context_compressor.compression_count = 0 + self.context_compressor._context_probed = False @staticmethod def _safe_print(*args, **kwargs): diff --git a/tests/test_cli_new_session.py b/tests/test_cli_new_session.py index 7fed48e40c1..0490aad9ce1 100644 --- a/tests/test_cli_new_session.py +++ b/tests/test_cli_new_session.py @@ -12,6 +12,17 @@ from hermes_state import SessionDB from tools.todo_tool import TodoStore +class _FakeCompressor: + """Minimal stand-in for ContextCompressor.""" + + def __init__(self): + self.last_prompt_tokens = 500 + self.last_completion_tokens = 200 + self.last_total_tokens = 700 + self.compression_count = 3 + self._context_probed = True + + class _FakeAgent: def __init__(self, session_id: str, session_start): self.session_id = session_id @@ -25,6 +36,42 @@ class _FakeAgent: self.flush_memories = MagicMock() self._invalidate_system_prompt = MagicMock() + # Token counters (non-zero to verify reset) + self.session_total_tokens = 1000 + self.session_input_tokens = 600 + self.session_output_tokens = 400 + self.session_prompt_tokens = 550 + self.session_completion_tokens = 350 + self.session_cache_read_tokens = 100 + self.session_cache_write_tokens = 50 + self.session_reasoning_tokens = 80 + self.session_api_calls = 5 + self.session_estimated_cost_usd = 0.42 + self.session_cost_status = "estimated" + self.session_cost_source = "openrouter" + self.context_compressor = _FakeCompressor() + + def reset_session_state(self): + """Mirror the real AIAgent.reset_session_state().""" + self.session_total_tokens = 0 + self.session_input_tokens = 0 + self.session_output_tokens = 0 + self.session_prompt_tokens = 0 + self.session_completion_tokens = 0 + self.session_cache_read_tokens = 0 + self.session_cache_write_tokens = 0 + self.session_reasoning_tokens = 0 + self.session_api_calls = 0 + self.session_estimated_cost_usd = 0.0 + self.session_cost_status = "unknown" + self.session_cost_source = "none" + if hasattr(self, "context_compressor") and self.context_compressor: + self.context_compressor.last_prompt_tokens = 0 + self.context_compressor.last_completion_tokens = 0 + self.context_compressor.last_total_tokens = 0 + self.context_compressor.compression_count = 0 + self.context_compressor._context_probed = False + def _make_cli(env_overrides=None, config_overrides=None, **kwargs): """Create a HermesCLI instance with minimal mocking.""" @@ -58,6 +105,7 @@ def _make_cli(env_overrides=None, config_overrides=None, **kwargs): "prompt_toolkit.key_binding": MagicMock(), "prompt_toolkit.completion": MagicMock(), "prompt_toolkit.formatted_text": MagicMock(), + "prompt_toolkit.auto_suggest": MagicMock(), } with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict( "os.environ", clean_env, clear=False @@ -137,3 +185,38 @@ def test_clear_command_starts_new_session_before_redrawing(tmp_path): cli.console.clear.assert_called_once() cli.show_banner.assert_called_once() assert cli.conversation_history == [] + + +def test_new_session_resets_token_counters(tmp_path): + """Regression test for #2099: /new must zero all token counters.""" + cli = _prepare_cli_with_active_session(tmp_path) + + # Verify counters are non-zero before reset + agent = cli.agent + assert agent.session_total_tokens > 0 + assert agent.session_api_calls > 0 + assert agent.context_compressor.compression_count > 0 + + cli.process_command("/new") + + # All agent token counters must be zero + assert agent.session_total_tokens == 0 + assert agent.session_input_tokens == 0 + assert agent.session_output_tokens == 0 + assert agent.session_prompt_tokens == 0 + assert agent.session_completion_tokens == 0 + assert agent.session_cache_read_tokens == 0 + assert agent.session_cache_write_tokens == 0 + assert agent.session_reasoning_tokens == 0 + assert agent.session_api_calls == 0 + assert agent.session_estimated_cost_usd == 0.0 + assert agent.session_cost_status == "unknown" + assert agent.session_cost_source == "none" + + # Context compressor counters must also be zero + comp = agent.context_compressor + assert comp.last_prompt_tokens == 0 + assert comp.last_completion_tokens == 0 + assert comp.last_total_tokens == 0 + assert comp.compression_count == 0 + assert comp._context_probed is False From 1055d4356a56b5c5420040279d661ff20f813107 Mon Sep 17 00:00:00 2001 From: Test Date: Thu, 19 Mar 2026 19:42:11 -0700 Subject: [PATCH 2/5] fix: skip model auto-detection for custom/local providers When the user is on a custom provider (provider=custom, localhost, or 127.0.0.1 endpoint), /model no longer tries to auto-detect a provider switch. The model name changes on the current endpoint as-is. To switch away from a custom endpoint, users must use explicit provider:model syntax (e.g. /model openai-codex:gpt-5.2-codex). A helpful tip is printed when changing models on a custom endpoint. This prevents the confusing case where someone on LM Studio types /model gpt-5.2-codex, the auto-detection tries to switch providers, fails or partially succeeds, and requests still go to the old endpoint. Also fixes the missing prompt_toolkit.auto_suggest mock stub in test_cli_init.py (same issue already fixed in test_cli_new_session.py). --- cli.py | 20 ++++++++++++++++++-- tests/test_cli_init.py | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/cli.py b/cli.py index ccc1e0d68da..af8ac4efc24 100755 --- a/cli.py +++ b/cli.py @@ -3517,8 +3517,17 @@ class HermesCLI: # Parse provider:model syntax (e.g. "openrouter:anthropic/claude-sonnet-4.5") current_provider = self.provider or self.requested_provider or "openrouter" target_provider, new_model = parse_model_input(raw_input, current_provider) - # Auto-detect provider when no explicit provider:model syntax was used - if target_provider == current_provider: + # Auto-detect provider when no explicit provider:model syntax was used. + # Skip auto-detection for custom providers — the model name might + # coincidentally match a known provider's catalog, but the user + # intends to use it on their custom endpoint. Require explicit + # provider:model syntax (e.g. /model openai-codex:gpt-5.2-codex) + # to switch away from a custom endpoint. + _base = self.base_url or "" + is_custom = current_provider == "custom" or ( + "localhost" in _base or "127.0.0.1" in _base + ) + if target_provider == current_provider and not is_custom: from hermes_cli.models import detect_provider_for_model detected = detect_provider_for_model(new_model, current_provider) if detected: @@ -3586,6 +3595,13 @@ class HermesCLI: if message: print(f" Reason: {message}") print(" Note: Model will revert on restart. Use a verified model to save to config.") + + # Helpful hint when staying on a custom endpoint + if is_custom and not provider_changed: + endpoint = self.base_url or "custom endpoint" + print(f" Endpoint: {endpoint}") + print(f" Tip: To switch providers, use /model provider:model") + print(f" e.g. /model openai-codex:gpt-5.2-codex") else: self._show_model_and_providers() elif canonical == "provider": diff --git a/tests/test_cli_init.py b/tests/test_cli_init.py index 5ebd301ed8f..f41f81bb81d 100644 --- a/tests/test_cli_init.py +++ b/tests/test_cli_init.py @@ -42,6 +42,7 @@ def _make_cli(env_overrides=None, config_overrides=None, **kwargs): "prompt_toolkit.key_binding": MagicMock(), "prompt_toolkit.completion": MagicMock(), "prompt_toolkit.formatted_text": MagicMock(), + "prompt_toolkit.auto_suggest": MagicMock(), } with patch.dict(sys.modules, prompt_toolkit_stubs), \ patch.dict("os.environ", clean_env, clear=False): From 4ad0083118fb8789dc589066102c1cbb56152b8e Mon Sep 17 00:00:00 2001 From: Test Date: Fri, 20 Mar 2026 04:36:06 -0700 Subject: [PATCH 3/5] fix(honcho): read HONCHO_BASE_URL for local/self-hosted instances Cherry-picked from PR #2120 by @unclebumpy. - from_env() now reads HONCHO_BASE_URL and enables Honcho when base_url is set, even without an API key - from_global_config() reads baseUrl from config root with HONCHO_BASE_URL env var as fallback - get_honcho_client() guard relaxed to allow base_url without api_key for no-auth local instances - Added HONCHO_BASE_URL to OPTIONAL_ENV_VARS registry Result: Setting HONCHO_BASE_URL=http://localhost:8000 in ~/.hermes/.env now correctly routes the Honcho client to a local instance. --- hermes_cli/config.py | 5 +++ honcho_integration/client.py | 24 +++++++++---- tests/honcho_integration/test_client.py | 45 +++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index d2a7693ac45..086acfa2b04 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -670,6 +670,11 @@ OPTIONAL_ENV_VARS = { "password": True, "category": "tool", }, + "HONCHO_BASE_URL": { + "description": "Base URL for self-hosted Honcho instances (no API key needed)", + "prompt": "Honcho base URL (e.g. http://localhost:8000)", + "category": "tool", + }, # ── Messaging platforms ── "TELEGRAM_BOT_TOKEN": { diff --git a/honcho_integration/client.py b/honcho_integration/client.py index 759576adaf7..4411241ad35 100644 --- a/honcho_integration/client.py +++ b/honcho_integration/client.py @@ -117,11 +117,13 @@ class HonchoClientConfig: def from_env(cls, workspace_id: str = "hermes") -> HonchoClientConfig: """Create config from environment variables (fallback).""" api_key = os.environ.get("HONCHO_API_KEY") + base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None return cls( workspace_id=workspace_id, api_key=api_key, environment=os.environ.get("HONCHO_ENVIRONMENT", "production"), - enabled=bool(api_key), + base_url=base_url, + enabled=bool(api_key or base_url), ) @classmethod @@ -171,8 +173,14 @@ class HonchoClientConfig: or raw.get("environment", "production") ) - # Auto-enable when API key is present (unless explicitly disabled) - # Host-level enabled wins, then root-level, then auto-enable if key exists. + base_url = ( + raw.get("baseUrl") + or os.environ.get("HONCHO_BASE_URL", "").strip() + or None + ) + + # Auto-enable when API key or base_url is present (unless explicitly disabled) + # Host-level enabled wins, then root-level, then auto-enable if key/url exists. host_enabled = host_block.get("enabled") root_enabled = raw.get("enabled") if host_enabled is not None: @@ -180,8 +188,8 @@ class HonchoClientConfig: elif root_enabled is not None: enabled = root_enabled else: - # Not explicitly set anywhere -> auto-enable if API key exists - enabled = bool(api_key) + # Not explicitly set anywhere -> auto-enable if API key or base_url exists + enabled = bool(api_key or base_url) # write_frequency: accept int or string raw_wf = ( @@ -214,6 +222,7 @@ class HonchoClientConfig: workspace_id=workspace, api_key=api_key, environment=environment, + base_url=base_url, peer_name=host_block.get("peerName") or raw.get("peerName"), ai_peer=ai_peer, linked_hosts=linked_hosts, @@ -348,11 +357,12 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho: if config is None: config = HonchoClientConfig.from_global_config() - if not config.api_key: + if not config.api_key and not config.base_url: raise ValueError( "Honcho API key not found. " "Get your API key at https://app.honcho.dev, " - "then run 'hermes honcho setup' or set HONCHO_API_KEY." + "then run 'hermes honcho setup' or set HONCHO_API_KEY. " + "For local instances, set HONCHO_BASE_URL instead." ) try: diff --git a/tests/honcho_integration/test_client.py b/tests/honcho_integration/test_client.py index b1ae29c54b7..a9a837e6265 100644 --- a/tests/honcho_integration/test_client.py +++ b/tests/honcho_integration/test_client.py @@ -60,6 +60,21 @@ class TestFromEnv: config = HonchoClientConfig.from_env(workspace_id="custom") assert config.workspace_id == "custom" + def test_reads_base_url_from_env(self): + with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False): + config = HonchoClientConfig.from_env() + assert config.base_url == "http://localhost:8000" + assert config.enabled is True + + def test_enabled_without_api_key_when_base_url_set(self): + """base_url alone (no API key) is sufficient to enable a local instance.""" + with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False): + os.environ.pop("HONCHO_API_KEY", None) + config = HonchoClientConfig.from_env() + assert config.api_key is None + assert config.base_url == "http://localhost:8000" + assert config.enabled is True + class TestFromGlobalConfig: def test_missing_config_falls_back_to_env(self, tmp_path): @@ -188,6 +203,36 @@ class TestFromGlobalConfig: config = HonchoClientConfig.from_global_config(config_path=config_file) assert config.api_key == "env-key" + def test_base_url_env_fallback(self, tmp_path): + """HONCHO_BASE_URL env var is used when no baseUrl in config JSON.""" + config_file = tmp_path / "config.json" + config_file.write_text(json.dumps({"workspace": "local"})) + + with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False): + config = HonchoClientConfig.from_global_config(config_path=config_file) + assert config.base_url == "http://localhost:8000" + assert config.enabled is True + + def test_base_url_from_config_root(self, tmp_path): + """baseUrl in config root is read and takes precedence over env var.""" + config_file = tmp_path / "config.json" + config_file.write_text(json.dumps({"baseUrl": "http://config-host:9000"})) + + with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False): + config = HonchoClientConfig.from_global_config(config_path=config_file) + assert config.base_url == "http://config-host:9000" + + def test_base_url_not_read_from_host_block(self, tmp_path): + """baseUrl is a root-level connection setting, not overridable per-host (consistent with apiKey).""" + config_file = tmp_path / "config.json" + config_file.write_text(json.dumps({ + "baseUrl": "http://root:9000", + "hosts": {"hermes": {"baseUrl": "http://host-block:9001"}}, + })) + + config = HonchoClientConfig.from_global_config(config_path=config_file) + assert config.base_url == "http://root:9000" + class TestResolveSessionName: def test_manual_override(self): From b1d05dfe8b93b3f4d13397f5a527ec84a5f3a5b1 Mon Sep 17 00:00:00 2001 From: Test Date: Fri, 20 Mar 2026 05:09:41 -0700 Subject: [PATCH 4/5] fix(openai): route api.openai.com to Responses API for GPT-5.x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on PR #1859 by @magi-morph (too stale to cherry-pick, reimplemented). GPT-5.x models reject tool calls + reasoning_effort on /v1/chat/completions with a 400 error directing to /v1/responses. This auto-detects api.openai.com in the base URL and switches to codex_responses mode in three places: - AIAgent.__init__: upgrades chat_completions → codex_responses - _try_activate_fallback(): same routing for fallback model - runtime_provider.py: _detect_api_mode_for_url() for both custom provider and openrouter runtime resolution paths Also extracts _is_direct_openai_url() helper to replace the inline check in _max_tokens_param(). --- hermes_cli/runtime_provider.py | 20 ++++++++++++++++++-- run_agent.py | 21 +++++++++++++++------ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index b00db5cf125..8c2979b6bca 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -24,6 +24,18 @@ def _normalize_custom_provider_name(value: str) -> str: return value.strip().lower().replace(" ", "-") +def _detect_api_mode_for_url(base_url: str) -> Optional[str]: + """Auto-detect api_mode from the resolved base URL. + + Direct api.openai.com endpoints need the Responses API for GPT-5.x + tool calls with reasoning (chat/completions returns 400). + """ + normalized = (base_url or "").strip().lower().rstrip("/") + if "api.openai.com" in normalized and "openrouter" not in normalized: + return "codex_responses" + return None + + def _auto_detect_local_model(base_url: str) -> str: """Query a local server for its model name when only one model is loaded.""" if not base_url: @@ -185,7 +197,9 @@ def _resolve_named_custom_runtime( return { "provider": "openrouter", - "api_mode": custom_provider.get("api_mode", "chat_completions"), + "api_mode": custom_provider.get("api_mode") + or _detect_api_mode_for_url(base_url) + or "chat_completions", "base_url": base_url, "api_key": api_key, "source": f"custom_provider:{custom_provider.get('name', requested_provider)}", @@ -263,7 +277,9 @@ def _resolve_openrouter_runtime( return { "provider": "openrouter", - "api_mode": _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions", + "api_mode": _parse_api_mode(model_cfg.get("api_mode")) + or _detect_api_mode_for_url(base_url) + or "chat_completions", "base_url": base_url, "api_key": api_key, "source": source, diff --git a/run_agent.py b/run_agent.py index cb0855f8786..e8365639bc2 100644 --- a/run_agent.py +++ b/run_agent.py @@ -501,6 +501,12 @@ class AIAgent: else: self.api_mode = "chat_completions" + # Direct OpenAI sessions use the Responses API path. GPT-5.x tool + # calls with reasoning are rejected on /v1/chat/completions, and + # Hermes is a tool-using client by default. + if self.api_mode == "chat_completions" and self._is_direct_openai_url(): + self.api_mode = "codex_responses" + # Pre-warm OpenRouter model metadata cache in a background thread. # fetch_model_metadata() is cached for 1 hour; this avoids a blocking # HTTP request on the first API response when pricing is estimated. @@ -1080,6 +1086,11 @@ class AIAgent: return self._safe_print(*args, **kwargs) + def _is_direct_openai_url(self, base_url: str = None) -> bool: + """Return True when a base URL targets OpenAI's native API.""" + url = (base_url or self._base_url_lower).lower() + return "api.openai.com" in url and "openrouter" not in url + def _max_tokens_param(self, value: int) -> dict: """Return the correct max tokens kwarg for the current provider. @@ -1087,11 +1098,7 @@ class AIAgent: 'max_completion_tokens'. OpenRouter, local models, and older OpenAI models use 'max_tokens'. """ - _is_direct_openai = ( - "api.openai.com" in self._base_url_lower - and "openrouter" not in self._base_url_lower - ) - if _is_direct_openai: + if self._is_direct_openai_url(): return {"max_completion_tokens": value} return {"max_tokens": value} @@ -3553,13 +3560,15 @@ class AIAgent: fb_provider) return False - # Determine api_mode from provider + # Determine api_mode from provider / base URL fb_api_mode = "chat_completions" fb_base_url = str(fb_client.base_url) if fb_provider == "openai-codex": fb_api_mode = "codex_responses" elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"): fb_api_mode = "anthropic_messages" + elif self._is_direct_openai_url(fb_base_url): + fb_api_mode = "codex_responses" old_model = self.model self.model = fb_model From 4494c0b033439ef0a55353ddd529f02eaeca8c52 Mon Sep 17 00:00:00 2001 From: Test Date: Fri, 20 Mar 2026 05:18:05 -0700 Subject: [PATCH 5/5] fix(cron): remove send_message/clarify from cron agents + autonomous prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cron jobs run unattended with no user present. Previously the agent had send_message and clarify tools available, which makes no sense — the final response is auto-delivered, and there's nobody to ask questions to. Changes: - Disable messaging and clarify toolsets for cron agent sessions - Update cron platform hint to emphasize autonomous execution: no user present, cannot ask questions, must execute fully and make decisions - Update cronjob tool schema description to match (remove stale send_message guidance) --- agent/prompt_builder.py | 10 +++++----- cron/scheduler.py | 2 +- tools/cronjob_tools.py | 8 +++----- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index b9a415c1d3b..a427863047e 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -206,11 +206,11 @@ PLATFORM_HINTS = { "contextually appropriate." ), "cron": ( - "You are running as a scheduled cron job. Your final response is automatically " - "delivered to the job's configured destination, so do not use send_message to " - "send to that same target again. If you want the user to receive something in " - "the scheduled destination, put it directly in your final response. Use " - "send_message only for additional or different targets." + "You are running as a scheduled cron job. There is no user present — you " + "cannot ask questions, request clarification, or wait for follow-up. Execute " + "the task fully and autonomously, making reasonable decisions where needed. " + "Your final response is automatically delivered to the job's configured " + "destination — put the primary content directly in your response." ), "cli": ( "You are a CLI AI Agent. Try not to use markdown but simple text " diff --git a/cron/scheduler.py b/cron/scheduler.py index e996df07956..417c3eb4373 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -391,7 +391,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: providers_ignored=pr.get("ignore"), providers_order=pr.get("order"), provider_sort=pr.get("sort"), - disabled_toolsets=["cronjob"], + disabled_toolsets=["cronjob", "messaging", "clarify"], quiet_mode=True, platform="cron", session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}", diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index 74b958a56d1..0cb1dc19dd3 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -336,11 +336,9 @@ Jobs run in a fresh session with no current-chat context, so prompts must be sel If skill or skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction. On update, passing skills=[] clears attached skills. -NOTE: The agent's final response is auto-delivered to the target — do NOT use -send_message in the prompt for that same destination. Same-target send_message -calls are skipped to avoid duplicate cron deliveries. Put the primary -user-facing content in the final response, and use send_message only for -additional or different targets. +NOTE: The agent's final response is auto-delivered to the target. Put the primary +user-facing content in the final response. Cron jobs run autonomously with no user +present — they cannot ask questions or request clarification. Important safety rule: cron-run sessions should not recursively schedule more cron jobs.""", "parameters": {