mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
refactor: extract atomic_json_write helper, add 24 checkpoint tests
Extract the duplicated temp-file + fsync + os.replace pattern from batch_runner.py (1 instance) and process_registry.py (2 instances) into a shared utils.atomic_json_write() function. Add 12 tests for atomic_json_write covering: valid JSON, parent dir creation, overwrite, crash safety (original preserved on error), no temp file leaks, string paths, unicode, custom indent, concurrent writes. Add 12 tests for batch_runner checkpoint behavior covering: _save_checkpoint (valid JSON, last_updated, overwrite, lock/no-lock, parent dirs, no temp leaks), _load_checkpoint (missing file, existing data, corrupt JSON), and resume logic (preserves prior progress, different run_name starts fresh).
This commit is contained in:
@@ -37,7 +37,6 @@ import shlex
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
@@ -707,25 +706,9 @@ class ProcessRegistry:
|
||||
"session_key": s.session_key,
|
||||
})
|
||||
|
||||
# Atomic write: temp file + os.replace to avoid corruption on crash
|
||||
CHECKPOINT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
dir=str(CHECKPOINT_PATH.parent),
|
||||
prefix='.checkpoint_',
|
||||
suffix='.tmp',
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, 'w', encoding='utf-8') as f:
|
||||
json.dump(entries, f, indent=2, ensure_ascii=False)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, CHECKPOINT_PATH)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
# Atomic write to avoid corruption on crash
|
||||
from utils import atomic_json_write
|
||||
atomic_json_write(CHECKPOINT_PATH, entries)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to write checkpoint file: %s", e, exc_info=True)
|
||||
|
||||
@@ -774,26 +757,9 @@ class ProcessRegistry:
|
||||
logger.info("Recovered detached process: %s (pid=%d)", session.command[:60], pid)
|
||||
|
||||
# Clear the checkpoint (will be rewritten as processes finish)
|
||||
# Use atomic write to avoid corruption
|
||||
try:
|
||||
CHECKPOINT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
dir=str(CHECKPOINT_PATH.parent),
|
||||
prefix='.checkpoint_',
|
||||
suffix='.tmp',
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, 'w', encoding='utf-8') as f:
|
||||
f.write("[]")
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, CHECKPOINT_PATH)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
from utils import atomic_json_write
|
||||
atomic_json_write(CHECKPOINT_PATH, [])
|
||||
except Exception as e:
|
||||
logger.debug("Could not clear checkpoint file: %s", e, exc_info=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user