Compare commits

...

11 Commits

Author SHA1 Message Date
teyrebaz33
9c2ec6a2d9 fix: auto-reload MCP tools when mcp_servers config changes without restart
Fixes #1036

After adding an MCP server to config.yaml, users had to restart Hermes
before the new tools became visible — even though /reload-mcp existed.

Add _check_config_mcp_changes() called from process_loop every 5s:
- stat() config.yaml for mtime changes (fast path, no YAML parse)
- On mtime change, parse and compare mcp_servers section
- If mcp_servers changed, auto-trigger _reload_mcp() and notify user
- Skip check while agent is running to avoid interrupting tool calls
- Throttled to CONFIG_WATCH_INTERVAL=5s to avoid busy-polling

/reload-mcp still works for manual force-reload.

Tests: 6 new tests in TestMCPConfigWatch, all passed
2026-03-15 19:01:52 -07:00
Teknium
471c663fdf fix(cli): silence tirith prefetch install warnings at startup (#1452) 2026-03-15 18:07:03 -07:00
Teknium
64d333204b Merge pull request #1242 from NousResearch/fix/file-tool-log-noise
fix: reduce file tool log noise
2026-03-15 11:11:18 -07:00
Teknium
c44af43840 Merge pull request #1401 from NousResearch/hermes/hermes-eca4a640
test: protect atomic temp cleanup on interrupts
2026-03-15 11:10:41 -07:00
Teknium
934fc9df22 Merge pull request #1440 from NousResearch/fix/1071-dict-tool-args
fix: handle dict tool call arguments from local backends
2026-03-15 08:04:09 -07:00
teknium1
5847c180c6 test: restore vllm integration coverage and add dict-args regression
Restore the existing vLLM integration test module that was accidentally replaced during development and add a focused agent-loop regression test for dict tool-call arguments from OpenAI-compatible local backends.
2026-03-15 08:02:29 -07:00
teknium1
93a0c0cddd fix: handle dict tool call arguments from local backends
Normalize tool call arguments when OpenAI-compatible backends return parsed dict/list payloads instead of JSON strings. This prevents the .strip() crash during tool-call validation for llama.cpp and similar servers, while preserving existing empty-string and invalid-JSON handling. Adds a focused regression test for dict arguments in the agent loop.
2026-03-15 08:00:19 -07:00
Teknium
23e8fdd167 feat(discord): auto-thread on @mention + skip mention in bot threads
Two changes to align Discord behavior with Slack:

1. Auto-thread on @mention (default: true)
   - When someone @mentions the bot in a server channel, a thread is
     automatically created from their message and the response goes there.
   - Each thread gets its own isolated session (like Slack).
   - Configurable via discord.auto_thread in config.yaml (default: true)
     or DISCORD_AUTO_THREAD env var (env takes precedence).
   - DMs and existing threads are unaffected.

2. Skip @mention in bot-participated threads
   - Once the bot has responded in a thread (auto-created or manually
     entered), subsequent messages in that thread no longer require
     @mention. Users can just type normally.
   - Tracked via in-memory set (_bot_participated_threads). After a
     gateway restart, users need to @mention once to re-establish.
   - Threads the bot hasn't participated in still require @mention.

Config change:
   discord:
     auto_thread: true  # new, added to DEFAULT_CONFIG

Tests: 7 new tests covering auto-thread default, disable, bot thread
participation tracking, and mention skip logic. All 903 gateway tests pass.
2026-03-15 07:59:55 -07:00
Teknium
3268b98779 Merge pull request #1437 from NousResearch/fix/1219-cron-thread-context
fix: preserve thread context for cronjob deliver=origin
2026-03-15 06:58:37 -07:00
teknium1
b117bbc125 test: cover atomic temp cleanup on interrupts
- add regression coverage for BaseException cleanup in atomic_json_write
- add dedicated atomic_yaml_write tests, including interrupt cleanup
- document why BaseException is intentional in both helpers
2026-03-14 22:31:51 -07:00
teknium1
b59da08730 fix: reduce file tool log noise
- treat git diff --cached --quiet rc=1 as an expected checkpoint state
  instead of logging it as an error
- downgrade expected write PermissionError/EROFS/EACCES failures out of
  error logging while keeping unexpected exceptions at error level
- add regression tests for both logging behaviors
2026-03-13 22:14:00 -07:00
18 changed files with 590 additions and 35 deletions

61
cli.py
View File

@@ -3484,6 +3484,56 @@ class HermesCLI:
except Exception as e:
print(f" Error generating insights: {e}")
def _check_config_mcp_changes(self) -> None:
"""Detect mcp_servers changes in config.yaml and auto-reload MCP connections.
Called from process_loop every CONFIG_WATCH_INTERVAL seconds.
Compares config.yaml mtime + mcp_servers section against the last
known state. When a change is detected, triggers _reload_mcp() and
informs the user so they know the tool list has been refreshed.
"""
import time
import yaml as _yaml
CONFIG_WATCH_INTERVAL = 5.0 # seconds between config.yaml stat() calls
now = time.monotonic()
if now - self._last_config_check < CONFIG_WATCH_INTERVAL:
return
self._last_config_check = now
from hermes_cli.config import get_config_path as _get_config_path
cfg_path = _get_config_path()
if not cfg_path.exists():
return
try:
mtime = cfg_path.stat().st_mtime
except OSError:
return
if mtime == self._config_mtime:
return # File unchanged — fast path
# File changed — check whether mcp_servers section changed
self._config_mtime = mtime
try:
with open(cfg_path, encoding="utf-8") as f:
new_cfg = _yaml.safe_load(f) or {}
except Exception:
return
new_mcp = new_cfg.get("mcp_servers") or {}
if new_mcp == self._config_mcp_servers:
return # mcp_servers unchanged (some other section was edited)
self._config_mcp_servers = new_mcp
# Notify user and reload
print()
print("🔄 MCP server config changed — reloading connections...")
with self._busy_command(self._slow_command_status("/reload-mcp")):
self._reload_mcp()
def _reload_mcp(self):
"""Reload MCP servers: disconnect all, re-read config.yaml, reconnect.
@@ -4749,6 +4799,12 @@ class HermesCLI:
self._interrupt_queue = queue.Queue() # For messages typed while agent is running
self._should_exit = False
self._last_ctrl_c_time = 0 # Track double Ctrl+C for force exit
# Config file watcher — detect mcp_servers changes and auto-reload
from hermes_cli.config import get_config_path as _get_config_path
_cfg_path = _get_config_path()
self._config_mtime: float = _cfg_path.stat().st_mtime if _cfg_path.exists() else 0.0
self._config_mcp_servers: dict = self.config.get("mcp_servers") or {}
self._last_config_check: float = 0.0 # monotonic time of last check
# Clarify tool state: interactive question/answer with the user.
# When the agent calls the clarify tool, _clarify_state is set and
@@ -4797,7 +4853,7 @@ class HermesCLI:
# Ensure tirith security scanner is available (downloads if needed)
try:
from tools.tirith_security import ensure_installed
ensure_installed()
ensure_installed(log_failures=False)
except Exception:
pass # Non-fatal — fail-open at scan time if unavailable
@@ -5682,6 +5738,9 @@ class HermesCLI:
try:
user_input = self._pending_input.get(timeout=0.1)
except queue.Empty:
# Periodic config watcher — auto-reload MCP on mcp_servers change
if not self._agent_running:
self._check_config_mcp_changes()
continue
if not user_input:

View File

@@ -433,6 +433,9 @@ class DiscordAdapter(BasePlatformAdapter):
self._voice_listen_tasks: Dict[int, asyncio.Task] = {} # guild_id -> listen loop
self._voice_input_callback: Optional[Callable] = None # set by run.py
self._on_voice_disconnect: Optional[Callable] = None # set by run.py
# Track threads where the bot has participated so follow-up messages
# in those threads don't require @mention.
self._bot_participated_threads: set = set()
async def connect(self) -> bool:
"""Connect to Discord and start receiving events."""
@@ -613,7 +616,7 @@ class DiscordAdapter(BasePlatformAdapter):
"""Send a message to a Discord channel."""
if not self._client:
return SendResult(success=False, error="Not connected")
try:
# Get the channel
channel = self._client.get_channel(int(chat_id))
@@ -1798,14 +1801,13 @@ class DiscordAdapter(BasePlatformAdapter):
async def _handle_message(self, message: DiscordMessage) -> None:
"""Handle incoming Discord messages."""
# In server channels (not DMs), require the bot to be @mentioned
# UNLESS the channel is in the free-response list.
# UNLESS the channel is in the free-response list or the message is
# in a thread where the bot has already participated.
#
# Config:
# DISCORD_FREE_RESPONSE_CHANNELS: Comma-separated channel IDs where the
# bot responds to every message without needing a mention.
# DISCORD_REQUIRE_MENTION: Set to "false" to disable mention requirement
# globally (all channels become free-response). Default: "true".
# Can also be set via discord.require_mention in config.yaml.
# Config (all settable via discord.* in config.yaml):
# discord.require_mention: Require @mention in server channels (default: true)
# discord.free_response_channels: Channel IDs where bot responds without mention
# discord.auto_thread: Auto-create thread on @mention in channels (default: true)
thread_id = None
parent_channel_id = None
@@ -1824,7 +1826,11 @@ class DiscordAdapter(BasePlatformAdapter):
require_mention = os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no")
is_free_channel = bool(channel_ids & free_channels)
if require_mention and not is_free_channel:
# Skip the mention check if the message is in a thread where
# the bot has previously participated (auto-created or replied in).
in_bot_thread = is_thread and thread_id in self._bot_participated_threads
if require_mention and not is_free_channel and not in_bot_thread:
if self._client.user not in message.mentions:
return
@@ -1833,17 +1839,18 @@ class DiscordAdapter(BasePlatformAdapter):
message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
# Auto-thread: when enabled, automatically create a thread for every
# new message in a text channel so each conversation is isolated.
# @mention in a text channel so each conversation is isolated (like Slack).
# Messages already inside threads or DMs are unaffected.
auto_threaded_channel = None
if not is_thread and not isinstance(message.channel, discord.DMChannel):
auto_thread = os.getenv("DISCORD_AUTO_THREAD", "").lower() in ("true", "1", "yes")
auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
if auto_thread:
thread = await self._auto_create_thread(message)
if thread:
is_thread = True
thread_id = str(thread.id)
auto_threaded_channel = thread
self._bot_participated_threads.add(thread_id)
# Determine message type
msg_type = MessageType.TEXT
@@ -1943,7 +1950,12 @@ class DiscordAdapter(BasePlatformAdapter):
reply_to_message_id=str(message.reference.message_id) if message.reference else None,
timestamp=message.created_at,
)
# Track thread participation so the bot won't require @mention for
# follow-up messages in threads it has already engaged in.
if thread_id:
self._bot_participated_threads.add(thread_id)
await self.handle_message(event)

View File

@@ -305,7 +305,7 @@ class GatewayRunner:
# Ensure tirith security scanner is available (downloads if needed)
try:
from tools.tirith_security import ensure_installed
ensure_installed()
ensure_installed(log_failures=False)
except Exception:
pass # Non-fatal — fail-open at scan time if unavailable

View File

@@ -280,6 +280,7 @@ DEFAULT_CONFIG = {
"discord": {
"require_mention": True, # Require @mention to respond in server channels
"free_response_channels": "", # Comma-separated channel IDs where bot responds without mention
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
},
# Permanently allowed dangerous command patterns (added via "always" approval)

View File

@@ -5582,6 +5582,12 @@ class AIAgent:
invalid_json_args = []
for tc in assistant_message.tool_calls:
args = tc.function.arguments
if isinstance(args, (dict, list)):
tc.function.arguments = json.dumps(args)
continue
if args is not None and not isinstance(args, str):
tc.function.arguments = str(args)
args = tc.function.arguments
# Treat empty/whitespace strings as empty object
if not args or not args.strip():
tc.function.arguments = "{}"

View File

@@ -252,3 +252,109 @@ async def test_discord_dms_ignore_mention_requirement(adapter, monkeypatch):
event = adapter.handle_message.await_args.args[0]
assert event.text == "dm without mention"
assert event.source.chat_type == "dm"
@pytest.mark.asyncio
async def test_discord_auto_thread_enabled_by_default(adapter, monkeypatch):
"""Auto-threading should be enabled by default (DISCORD_AUTO_THREAD defaults to 'true')."""
monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
# Patch _auto_create_thread to return a fake thread
fake_thread = FakeThread(channel_id=999, name="auto-thread")
adapter._auto_create_thread = AsyncMock(return_value=fake_thread)
message = make_message(channel=FakeTextChannel(channel_id=123), content="hello")
await adapter._handle_message(message)
adapter._auto_create_thread.assert_awaited_once()
adapter.handle_message.assert_awaited_once()
event = adapter.handle_message.await_args.args[0]
assert event.source.chat_type == "thread"
assert event.source.thread_id == "999"
@pytest.mark.asyncio
async def test_discord_auto_thread_can_be_disabled(adapter, monkeypatch):
"""Setting auto_thread to false skips thread creation."""
monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
adapter._auto_create_thread = AsyncMock()
message = make_message(channel=FakeTextChannel(channel_id=123), content="hello")
await adapter._handle_message(message)
adapter._auto_create_thread.assert_not_awaited()
adapter.handle_message.assert_awaited_once()
event = adapter.handle_message.await_args.args[0]
assert event.source.chat_type == "group"
@pytest.mark.asyncio
async def test_discord_bot_thread_skips_mention_requirement(adapter, monkeypatch):
"""Messages in a thread the bot has participated in should not require @mention."""
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
# Simulate bot having previously participated in thread 456
adapter._bot_participated_threads.add("456")
thread = FakeThread(channel_id=456, name="existing thread")
message = make_message(channel=thread, content="follow-up without mention")
await adapter._handle_message(message)
adapter.handle_message.assert_awaited_once()
event = adapter.handle_message.await_args.args[0]
assert event.text == "follow-up without mention"
assert event.source.chat_type == "thread"
@pytest.mark.asyncio
async def test_discord_unknown_thread_still_requires_mention(adapter, monkeypatch):
"""Messages in a thread the bot hasn't participated in should still require @mention."""
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
# Bot has NOT participated in thread 789
thread = FakeThread(channel_id=789, name="some thread")
message = make_message(channel=thread, content="hello from unknown thread")
await adapter._handle_message(message)
adapter.handle_message.assert_not_awaited()
@pytest.mark.asyncio
async def test_discord_auto_thread_tracks_participation(adapter, monkeypatch):
"""Auto-created threads should be tracked for future mention-free replies."""
monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
fake_thread = FakeThread(channel_id=555, name="auto-thread")
adapter._auto_create_thread = AsyncMock(return_value=fake_thread)
message = make_message(channel=FakeTextChannel(channel_id=123), content="start a thread")
await adapter._handle_message(message)
assert "555" in adapter._bot_participated_threads
@pytest.mark.asyncio
async def test_discord_thread_participation_tracked_on_dispatch(adapter, monkeypatch):
"""When the bot processes a message in a thread, it tracks participation."""
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
thread = FakeThread(channel_id=777, name="manually created thread")
message = make_message(channel=thread, content="hello in thread")
await adapter._handle_message(message)
assert "777" in adapter._bot_participated_threads

View File

@@ -363,11 +363,37 @@ async def test_auto_thread_creates_thread_and_redirects(adapter, monkeypatch):
@pytest.mark.asyncio
async def test_auto_thread_disabled_by_default(adapter, monkeypatch):
"""Without DISCORD_AUTO_THREAD, messages stay in the channel."""
async def test_auto_thread_enabled_by_default_slash_commands(adapter, monkeypatch):
"""Without DISCORD_AUTO_THREAD env var, auto-threading is enabled (default: true)."""
monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
fake_thread = _FakeThreadChannel(channel_id=999, name="auto-thread")
adapter._auto_create_thread = AsyncMock(return_value=fake_thread)
captured_events = []
async def capture_handle(event):
captured_events.append(event)
adapter.handle_message = capture_handle
msg = _fake_message(_FakeTextChannel())
await adapter._handle_message(msg)
adapter._auto_create_thread.assert_awaited_once()
assert len(captured_events) == 1
assert captured_events[0].source.chat_id == "999" # redirected to thread
assert captured_events[0].source.chat_type == "thread"
@pytest.mark.asyncio
async def test_auto_thread_can_be_disabled(adapter, monkeypatch):
"""Setting DISCORD_AUTO_THREAD=false keeps messages in the channel."""
monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
adapter._auto_create_thread = AsyncMock()
captured_events = []

View File

@@ -68,6 +68,22 @@ class TestAtomicJsonWrite:
tmp_files = [f for f in tmp_path.iterdir() if ".tmp" in f.name]
assert len(tmp_files) == 0
def test_cleans_up_temp_file_on_baseexception(self, tmp_path):
class SimulatedAbort(BaseException):
pass
target = tmp_path / "data.json"
original = {"preserved": True}
target.write_text(json.dumps(original), encoding="utf-8")
with patch("utils.json.dump", side_effect=SimulatedAbort):
with pytest.raises(SimulatedAbort):
atomic_json_write(target, {"new": True})
tmp_files = [f for f in tmp_path.iterdir() if ".tmp" in f.name]
assert len(tmp_files) == 0
assert json.loads(target.read_text(encoding="utf-8")) == original
def test_accepts_string_path(self, tmp_path):
target = str(tmp_path / "string_path.json")
atomic_json_write(target, {"string": True})

View File

@@ -0,0 +1,44 @@
"""Tests for utils.atomic_yaml_write — crash-safe YAML file writes."""
from pathlib import Path
from unittest.mock import patch
import pytest
import yaml
from utils import atomic_yaml_write
class TestAtomicYamlWrite:
def test_writes_valid_yaml(self, tmp_path):
target = tmp_path / "data.yaml"
data = {"key": "value", "nested": {"a": 1}}
atomic_yaml_write(target, data)
assert yaml.safe_load(target.read_text(encoding="utf-8")) == data
def test_cleans_up_temp_file_on_baseexception(self, tmp_path):
class SimulatedAbort(BaseException):
pass
target = tmp_path / "data.yaml"
original = {"preserved": True}
target.write_text(yaml.safe_dump(original), encoding="utf-8")
with patch("utils.yaml.dump", side_effect=SimulatedAbort):
with pytest.raises(SimulatedAbort):
atomic_yaml_write(target, {"new": True})
tmp_files = [f for f in tmp_path.iterdir() if ".tmp" in f.name]
assert len(tmp_files) == 0
assert yaml.safe_load(target.read_text(encoding="utf-8")) == original
def test_appends_extra_content(self, tmp_path):
target = tmp_path / "data.yaml"
atomic_yaml_write(target, {"key": "value"}, extra_content="\n# comment\n")
text = target.read_text(encoding="utf-8")
assert "key: value" in text
assert "# comment" in text

View File

@@ -0,0 +1,103 @@
"""Tests for automatic MCP reload when config.yaml mcp_servers section changes."""
import time
from pathlib import Path
from unittest.mock import MagicMock, patch
def _make_cli(tmp_path, mcp_servers=None):
"""Create a minimal HermesCLI instance with mocked config."""
import cli as cli_mod
obj = object.__new__(cli_mod.HermesCLI)
obj.config = {"mcp_servers": mcp_servers or {}}
obj._agent_running = False
obj._last_config_check = 0.0
obj._config_mcp_servers = mcp_servers or {}
cfg_file = tmp_path / "config.yaml"
cfg_file.write_text("mcp_servers: {}\n")
obj._config_mtime = cfg_file.stat().st_mtime
obj._reload_mcp = MagicMock()
obj._busy_command = MagicMock()
obj._busy_command.return_value.__enter__ = MagicMock(return_value=None)
obj._busy_command.return_value.__exit__ = MagicMock(return_value=False)
obj._slow_command_status = MagicMock(return_value="reloading...")
return obj, cfg_file
class TestMCPConfigWatch:
def test_no_change_does_not_reload(self, tmp_path):
"""If mtime and mcp_servers unchanged, _reload_mcp is NOT called."""
obj, cfg_file = _make_cli(tmp_path)
with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
obj._check_config_mcp_changes()
obj._reload_mcp.assert_not_called()
def test_mtime_change_with_same_mcp_servers_does_not_reload(self, tmp_path):
"""If file mtime changes but mcp_servers is identical, no reload."""
import yaml
obj, cfg_file = _make_cli(tmp_path, mcp_servers={"fs": {"command": "npx"}})
# Write same mcp_servers but touch the file
cfg_file.write_text(yaml.dump({"mcp_servers": {"fs": {"command": "npx"}}}))
# Force mtime to appear changed
obj._config_mtime = 0.0
with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
obj._check_config_mcp_changes()
obj._reload_mcp.assert_not_called()
def test_new_mcp_server_triggers_reload(self, tmp_path):
"""Adding a new MCP server to config triggers auto-reload."""
import yaml
obj, cfg_file = _make_cli(tmp_path, mcp_servers={})
# Simulate user adding a new MCP server to config.yaml
cfg_file.write_text(yaml.dump({"mcp_servers": {"github": {"url": "https://mcp.github.com"}}}))
obj._config_mtime = 0.0 # force stale mtime
with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
obj._check_config_mcp_changes()
obj._reload_mcp.assert_called_once()
def test_removed_mcp_server_triggers_reload(self, tmp_path):
"""Removing an MCP server from config triggers auto-reload."""
import yaml
obj, cfg_file = _make_cli(tmp_path, mcp_servers={"github": {"url": "https://mcp.github.com"}})
# Simulate user removing the server
cfg_file.write_text(yaml.dump({"mcp_servers": {}}))
obj._config_mtime = 0.0
with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
obj._check_config_mcp_changes()
obj._reload_mcp.assert_called_once()
def test_interval_throttle_skips_check(self, tmp_path):
"""If called within CONFIG_WATCH_INTERVAL, stat() is skipped."""
obj, cfg_file = _make_cli(tmp_path)
obj._last_config_check = time.monotonic() # just checked
with patch("hermes_cli.config.get_config_path", return_value=cfg_file), \
patch.object(Path, "stat") as mock_stat:
obj._check_config_mcp_changes()
mock_stat.assert_not_called()
obj._reload_mcp.assert_not_called()
def test_missing_config_file_does_not_crash(self, tmp_path):
"""If config.yaml doesn't exist, _check_config_mcp_changes is a no-op."""
obj, cfg_file = _make_cli(tmp_path)
missing = tmp_path / "nonexistent.yaml"
with patch("hermes_cli.config.get_config_path", return_value=missing):
obj._check_config_mcp_changes() # should not raise
obj._reload_mcp.assert_not_called()

View File

@@ -0,0 +1,72 @@
import json
from types import SimpleNamespace
def _tool_call(name: str, arguments):
return SimpleNamespace(
id="call_1",
type="function",
function=SimpleNamespace(name=name, arguments=arguments),
)
def _response_with_tool_call(arguments):
assistant = SimpleNamespace(
content=None,
reasoning=None,
tool_calls=[_tool_call("read_file", arguments)],
)
choice = SimpleNamespace(message=assistant, finish_reason="tool_calls")
return SimpleNamespace(choices=[choice], usage=None)
class _FakeChatCompletions:
def __init__(self):
self.calls = 0
def create(self, **kwargs):
self.calls += 1
if self.calls == 1:
return _response_with_tool_call({"path": "README.md"})
return SimpleNamespace(
choices=[
SimpleNamespace(
message=SimpleNamespace(content="done", reasoning=None, tool_calls=[]),
finish_reason="stop",
)
],
usage=None,
)
class _FakeClient:
def __init__(self):
self.chat = SimpleNamespace(completions=_FakeChatCompletions())
def test_tool_call_validation_accepts_dict_arguments(monkeypatch):
from run_agent import AIAgent
monkeypatch.setattr("run_agent.OpenAI", lambda **kwargs: _FakeClient())
monkeypatch.setattr(
"run_agent.get_tool_definitions",
lambda *args, **kwargs: [{"function": {"name": "read_file"}}],
)
monkeypatch.setattr(
"run_agent.handle_function_call",
lambda name, args, task_id=None, **kwargs: json.dumps({"ok": True, "args": args}),
)
agent = AIAgent(
model="test-model",
api_key="test-key",
base_url="http://localhost:8080/v1",
platform="cli",
max_iterations=3,
quiet_mode=True,
skip_memory=True,
)
result = agent.run_conversation("read the file")
assert result["final_response"] == "done"

View File

@@ -1,8 +1,10 @@
"""Tests for tools/checkpoint_manager.py — CheckpointManager."""
import logging
import os
import json
import shutil
import subprocess
import pytest
from pathlib import Path
from unittest.mock import patch
@@ -143,6 +145,12 @@ class TestTakeCheckpoint:
result = mgr.ensure_checkpoint(str(work_dir), "initial")
assert result is True
def test_successful_checkpoint_does_not_log_expected_diff_exit(self, mgr, work_dir, caplog):
with caplog.at_level(logging.ERROR, logger="tools.checkpoint_manager"):
result = mgr.ensure_checkpoint(str(work_dir), "initial")
assert result is True
assert not any("diff --cached --quiet" in r.getMessage() for r in caplog.records)
def test_dedup_same_turn(self, mgr, work_dir):
r1 = mgr.ensure_checkpoint(str(work_dir), "first")
r2 = mgr.ensure_checkpoint(str(work_dir), "second")
@@ -375,6 +383,26 @@ class TestErrorResilience:
result = mgr.ensure_checkpoint(str(work_dir), "test")
assert result is False
def test_run_git_allows_expected_nonzero_without_error_log(self, tmp_path, caplog):
completed = subprocess.CompletedProcess(
args=["git", "diff", "--cached", "--quiet"],
returncode=1,
stdout="",
stderr="",
)
with patch("tools.checkpoint_manager.subprocess.run", return_value=completed):
with caplog.at_level(logging.ERROR, logger="tools.checkpoint_manager"):
ok, stdout, stderr = _run_git(
["diff", "--cached", "--quiet"],
tmp_path / "shadow",
str(tmp_path / "work"),
allowed_returncodes={1},
)
assert ok is False
assert stdout == ""
assert stderr == ""
assert not caplog.records
def test_checkpoint_failure_does_not_raise(self, mgr, work_dir, monkeypatch):
"""Checkpoint failures should never raise — they're silently logged."""
def broken_run_git(*args, **kwargs):

View File

@@ -5,6 +5,7 @@ handling without requiring a running terminal environment.
"""
import json
import logging
from unittest.mock import MagicMock, patch
from tools.file_tools import (
@@ -87,13 +88,26 @@ class TestWriteFileHandler:
mock_ops.write_file.assert_called_once_with("/tmp/out.txt", "hello world!\n")
@patch("tools.file_tools._get_file_ops")
def test_exception_returns_error_json(self, mock_get):
def test_permission_error_returns_error_json_without_error_log(self, mock_get, caplog):
mock_get.side_effect = PermissionError("read-only filesystem")
from tools.file_tools import write_file_tool
result = json.loads(write_file_tool("/tmp/out.txt", "data"))
with caplog.at_level(logging.DEBUG, logger="tools.file_tools"):
result = json.loads(write_file_tool("/tmp/out.txt", "data"))
assert "error" in result
assert "read-only" in result["error"]
assert any("write_file expected denial" in r.getMessage() for r in caplog.records)
assert not any(r.levelno >= logging.ERROR for r in caplog.records)
@patch("tools.file_tools._get_file_ops")
def test_unexpected_exception_still_logs_error(self, mock_get, caplog):
mock_get.side_effect = RuntimeError("boom")
from tools.file_tools import write_file_tool
with caplog.at_level(logging.ERROR, logger="tools.file_tools"):
result = json.loads(write_file_tool("/tmp/out.txt", "data"))
assert result["error"] == "boom"
assert any("write_file error" in r.getMessage() for r in caplog.records)
class TestPatchHandler:

View File

@@ -315,6 +315,23 @@ class TestEnsureInstalled:
mock_thread.start.assert_called_once()
_tirith_mod._resolved_path = None
@patch("tools.tirith_security._load_security_config")
def test_startup_prefetch_can_suppress_install_failure_logs(self, mock_cfg):
mock_cfg.return_value = {"tirith_enabled": True, "tirith_path": "tirith",
"tirith_timeout": 5, "tirith_fail_open": True}
_tirith_mod._resolved_path = None
with patch("tools.tirith_security.shutil.which", return_value=None), \
patch("tools.tirith_security._hermes_bin_dir", return_value="/nonexistent"), \
patch("tools.tirith_security._is_install_failed_on_disk", return_value=False), \
patch("tools.tirith_security.threading.Thread") as MockThread:
mock_thread = MagicMock()
MockThread.return_value = mock_thread
result = ensure_installed(log_failures=False)
assert result is None
assert MockThread.call_args.kwargs["kwargs"] == {"log_failures": False}
mock_thread.start.assert_called_once()
_tirith_mod._resolved_path = None
# ---------------------------------------------------------------------------
# Failed download caches the miss (Finding #1)
@@ -516,6 +533,22 @@ class TestCosignVerification:
assert path is None
assert reason == "cosign_missing"
@patch("tools.tirith_security.logger.debug")
@patch("tools.tirith_security.logger.warning")
@patch("tools.tirith_security.shutil.which", return_value=None)
@patch("tools.tirith_security._download_file")
@patch("tools.tirith_security._detect_target", return_value="aarch64-apple-darwin")
def test_install_quiet_mode_downgrades_cosign_missing_log(self, mock_target, mock_dl,
mock_which, mock_warning,
mock_debug):
"""Startup prefetch should not surface cosign-missing as a warning."""
from tools.tirith_security import _install_tirith
path, reason = _install_tirith(log_failures=False)
assert path is None
assert reason == "cosign_missing"
mock_warning.assert_not_called()
mock_debug.assert_called()
@patch("tools.tirith_security._verify_cosign", return_value=None)
@patch("tools.tirith_security.shutil.which", return_value="/usr/local/bin/cosign")
@patch("tools.tirith_security._download_file")

View File

@@ -92,10 +92,17 @@ def _run_git(
shadow_repo: Path,
working_dir: str,
timeout: int = _GIT_TIMEOUT,
allowed_returncodes: Optional[Set[int]] = None,
) -> tuple:
"""Run a git command against the shadow repo. Returns (ok, stdout, stderr)."""
"""Run a git command against the shadow repo. Returns (ok, stdout, stderr).
``allowed_returncodes`` suppresses error logging for known/expected non-zero
exits while preserving the normal ``ok = (returncode == 0)`` contract.
Example: ``git diff --cached --quiet`` returns 1 when changes exist.
"""
env = _git_env(shadow_repo, working_dir)
cmd = ["git"] + list(args)
allowed_returncodes = allowed_returncodes or set()
try:
result = subprocess.run(
cmd,
@@ -108,7 +115,7 @@ def _run_git(
ok = result.returncode == 0
stdout = result.stdout.strip()
stderr = result.stderr.strip()
if not ok:
if not ok and result.returncode not in allowed_returncodes:
logger.error(
"Git command failed: %s (rc=%d) stderr=%s",
" ".join(cmd), result.returncode, stderr,
@@ -381,7 +388,10 @@ class CheckpointManager:
# Check if there's anything to commit
ok_diff, diff_out, _ = _run_git(
["diff", "--cached", "--quiet"], shadow, working_dir,
["diff", "--cached", "--quiet"],
shadow,
working_dir,
allowed_returncodes={1},
)
if ok_diff:
# No changes to commit

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""File Tools Module - LLM agent file manipulation tools."""
import errno
import json
import logging
import os
@@ -11,6 +12,18 @@ from agent.redact import redact_sensitive_text
logger = logging.getLogger(__name__)
_EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS}
def _is_expected_write_exception(exc: Exception) -> bool:
"""Return True for expected write denials that should not hit error logs."""
if isinstance(exc, PermissionError):
return True
if isinstance(exc, OSError) and exc.errno in _EXPECTED_WRITE_ERRNOS:
return True
return False
_file_ops_lock = threading.Lock()
_file_ops_cache: dict = {}
@@ -238,7 +251,10 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
result = file_ops.write_file(path, content)
return json.dumps(result.to_dict(), ensure_ascii=False)
except Exception as e:
logger.error("write_file error: %s: %s", type(e).__name__, e)
if _is_expected_write_exception(e):
logger.debug("write_file expected denial: %s: %s", type(e).__name__, e)
else:
logger.error("write_file error: %s: %s", type(e).__name__, e, exc_info=True)
return json.dumps({"error": str(e)}, ensure_ascii=False)

View File

@@ -279,7 +279,7 @@ def _verify_checksum(archive_path: str, checksums_path: str, archive_name: str)
return True
def _install_tirith() -> tuple[str | None, str]:
def _install_tirith(*, log_failures: bool = True) -> tuple[str | None, str]:
"""Download and install tirith to $HERMES_HOME/bin/tirith.
Verifies provenance via cosign and SHA-256 checksum.
@@ -287,6 +287,8 @@ def _install_tirith() -> tuple[str | None, str]:
failure_reason is a short tag used by the disk marker to decide if the
failure is retryable (e.g. "cosign_missing" clears when cosign appears).
"""
log = logger.warning if log_failures else logger.debug
target = _detect_target()
if not target:
logger.info("tirith auto-install: unsupported platform %s/%s",
@@ -309,7 +311,7 @@ def _install_tirith() -> tuple[str | None, str]:
_download_file(f"{base_url}/{archive_name}", archive_path)
_download_file(f"{base_url}/checksums.txt", checksums_path)
except Exception as exc:
logger.warning("tirith download failed: %s", exc)
log("tirith download failed: %s", exc)
return None, "download_failed"
# Cosign provenance verification is mandatory for auto-install.
@@ -320,25 +322,25 @@ def _install_tirith() -> tuple[str | None, str]:
_download_file(f"{base_url}/checksums.txt.sig", sig_path)
_download_file(f"{base_url}/checksums.txt.pem", cert_path)
except Exception as exc:
logger.warning("tirith install skipped: cosign artifacts unavailable (%s). "
"Install tirith manually or install cosign for auto-install.", exc)
log("tirith install skipped: cosign artifacts unavailable (%s). "
"Install tirith manually or install cosign for auto-install.", exc)
return None, "cosign_artifacts_unavailable"
# Check cosign availability before attempting verification so we can
# distinguish "not installed" (retryable) from "installed but broken."
if not shutil.which("cosign"):
logger.warning("tirith install skipped: cosign not found on PATH. "
"Install cosign for auto-install, or install tirith manually.")
log("tirith install skipped: cosign not found on PATH. "
"Install cosign for auto-install, or install tirith manually.")
return None, "cosign_missing"
cosign_result = _verify_cosign(checksums_path, sig_path, cert_path)
if cosign_result is not True:
# False = verification rejected, None = execution failure (timeout/OSError)
if cosign_result is None:
logger.warning("tirith install aborted: cosign execution failed")
log("tirith install aborted: cosign execution failed")
return None, "cosign_exec_failed"
else:
logger.warning("tirith install aborted: cosign provenance verification failed")
log("tirith install aborted: cosign provenance verification failed")
return None, "cosign_verification_failed"
if not _verify_checksum(archive_path, checksums_path, archive_name):
@@ -354,7 +356,7 @@ def _install_tirith() -> tuple[str | None, str]:
tar.extract(member, tmpdir)
break
else:
logger.warning("tirith binary not found in archive")
log("tirith binary not found in archive")
return None, "binary_not_in_archive"
src = os.path.join(tmpdir, "tirith")
@@ -473,7 +475,7 @@ def _resolve_tirith_path(configured_path: str) -> str:
return expanded
def _background_install():
def _background_install(*, log_failures: bool = True):
"""Background thread target: download and install tirith."""
global _resolved_path, _install_failure_reason
with _install_lock:
@@ -494,7 +496,7 @@ def _background_install():
_install_failure_reason = ""
return
installed, reason = _install_tirith()
installed, reason = _install_tirith(log_failures=log_failures)
if installed:
_resolved_path = installed
_install_failure_reason = ""
@@ -505,7 +507,7 @@ def _background_install():
_mark_install_failed(reason)
def ensure_installed():
def ensure_installed(*, log_failures: bool = True):
"""Ensure tirith is available, downloading in background if needed.
Quick PATH/local checks are synchronous; network download runs in a
@@ -578,7 +580,10 @@ def ensure_installed():
# Need to download — launch background thread so startup doesn't block
if _install_thread is None or not _install_thread.is_alive():
_install_thread = threading.Thread(
target=_background_install, daemon=True)
target=_background_install,
kwargs={"log_failures": log_failures},
daemon=True,
)
_install_thread.start()
return None # Not available yet; commands will fail-open until ready

View File

@@ -50,6 +50,8 @@ def atomic_json_write(
os.fsync(f.fileno())
os.replace(tmp_path, path)
except BaseException:
# Intentionally catch BaseException so temp-file cleanup still runs for
# KeyboardInterrupt/SystemExit before re-raising the original signal.
try:
os.unlink(tmp_path)
except OSError:
@@ -96,6 +98,8 @@ def atomic_yaml_write(
os.fsync(f.fileno())
os.replace(tmp_path, path)
except BaseException:
# Match atomic_json_write: cleanup must also happen for process-level
# interruptions before we re-raise them.
try:
os.unlink(tmp_path)
except OSError: