Compare commits

...

3 Commits

Author SHA1 Message Date
teknium1
1a101d6652 chore(release): map islam666 for salvaged PR #39244 2026-06-07 06:43:44 -07:00
teknium1
3b4b9d3246 fix(cron): floor ticker sleep at 1s to prevent busy-spin + drop dead import
Hardening on salvaged #39244: _next_run_sleep_seconds returned 0 for a
past-due .next_run hint, which tight-loops the ticker at 100% CPU when a
paused-but-enabled job's next_run_at is in the past. Floor to 1s (cron_tick
is idempotent). Also removed the unused 'from pathlib import Path' import.
Adds direct coverage for the floor / shorten / cap / missing-hint paths.
2026-06-07 06:43:44 -07:00
islam666
558ee10348 fix(cron): wake ticker immediately when jobs are created/updated externally
Problem: When a cron job is created via  (or the cronjob
tool), the job is written to jobs.json on disk, but the gateway's cron ticker
only re-reads this file every 60 seconds. This means newly created jobs with
near-future next_run_at might not fire at the expected time — or might be
missed entirely if the next_run_at falls within the current 60-second window.

Changes:
- cron/jobs.py: Added _write_next_run_hint() that writes the earliest
  next_run_at among enabled jobs to a .next_run hint file alongside jobs.json.
  Called from save_jobs() after every write.
- gateway/run.py: Added _next_run_sleep_seconds() that reads the .next_run
  hint file and returns the appropriate sleep duration. Modified
  _start_cron_ticker() to use this instead of a fixed interval, so the ticker
  wakes up in time for the next due job.
- tests/cron/test_jobs.py: Added TestNextRunHint class with 5 tests covering
  hint file creation, disabled job filtering, missing next_run_at, removal
  when no jobs enabled, and end-to-end via create_job.

Fixes #39215
2026-06-07 06:42:36 -07:00
5 changed files with 202 additions and 1 deletions

View File

@@ -482,6 +482,10 @@ def save_jobs(jobs: List[Dict[str, Any]]):
os.fsync(f.fileno())
atomic_replace(tmp_path, JOBS_FILE)
_secure_file(JOBS_FILE)
# Write the earliest next_run_at so the ticker can wake up in time
# for newly created/updated jobs instead of waiting for the next
# full interval tick. See #39215.
_write_next_run_hint(jobs)
except BaseException:
try:
os.unlink(tmp_path)
@@ -490,6 +494,34 @@ def save_jobs(jobs: List[Dict[str, Any]]):
raise
def _write_next_run_hint(jobs: List[Dict[str, Any]]) -> None:
"""Write the earliest next_run_at among enabled jobs to a hint file.
The cron ticker reads this file after each tick and adjusts its sleep
duration so it wakes up in time for the next due job. Without this,
a job created via ``hermes cron create`` (or the cronjob tool) with a
near-future ``next_run_at`` might not fire for up to 60 seconds.
"""
next_run = None
for job in jobs:
if not job.get("enabled", True):
continue
nra = job.get("next_run_at")
if nra is None:
continue
if next_run is None or nra < next_run:
next_run = nra
hint_file = JOBS_FILE.parent / ".next_run"
try:
if next_run is not None:
hint_file.parent.mkdir(parents=True, exist_ok=True)
hint_file.write_text(next_run, encoding="utf-8")
elif hint_file.exists():
hint_file.unlink()
except OSError:
pass # best-effort; ticker falls back to normal interval
def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
"""Normalize and validate a cron job workdir.

View File

@@ -19538,6 +19538,35 @@ def _run_planned_stop_watcher(
stop_event.wait(poll_interval)
def _next_run_sleep_seconds(interval: int) -> int:
"""Return how long the cron ticker should sleep before the next tick.
Reads the .next_run hint file written by save_jobs(). If the earliest
enabled job's ``next_run_at`` is sooner than ``interval`` seconds from
now, sleep only until that time so the job fires close to its scheduled
moment. Falls back to ``interval`` if the hint file is missing or
unreadable.
"""
hint_file = get_hermes_home() / "cron" / ".next_run"
try:
text = hint_file.read_text(encoding="utf-8").strip()
if not text:
return interval
next_run = datetime.fromisoformat(text)
now = datetime.now(tz=next_run.tzinfo) if next_run.tzinfo else datetime.utcnow()
delta = (next_run - now).total_seconds()
if delta <= 0:
# Job is due (or the hint is stale/past-due — e.g. a paused job
# whose next_run_at is in the past). Return 1, not 0: a 0-second
# sleep would tight-loop the ticker at 100% CPU until the hint is
# rewritten. cron_tick() is idempotent, so a 1s floor still fires
# a genuinely-due job promptly while bounding the spin.
return 1
return max(1, min(int(delta) + 1, interval)) # 1s floor, cap at interval
except Exception:
return interval
def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, interval: int = 60):
"""
Background thread that ticks the cron scheduler at a regular interval.
@@ -19629,7 +19658,12 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
except Exception as e:
logger.debug("Curator tick error: %s", e)
stop_event.wait(timeout=interval)
# Sleep until the next tick, but wake up early if a new job was
# created with a near-future next_run_at. The .next_run hint file
# is written by save_jobs() and contains the earliest next_run_at
# among enabled jobs. See #39215.
sleep = _next_run_sleep_seconds(interval)
stop_event.wait(timeout=sleep)
logger.info("Cron ticker stopped")

View File

@@ -58,6 +58,7 @@ AUTHOR_MAP = {
"129007007+HeLLGURD@users.noreply.github.com": "HeLLGURD",
"290859878+synapsesx@users.noreply.github.com": "synapsesx",
"dirtyren@users.noreply.github.com": "dirtyren",
"islam666@users.noreply.github.com": "islam666",
"zhaolei.vc@bytedance.com": "zhaoleibd",
"jeffrobodie@gmail.com": "jeffrobodie-glitch",
"kyssta-exe@users.noreply.github.com": "kyssta-exe",

View File

@@ -991,3 +991,81 @@ class TestSaveJobOutput:
with pytest.raises(ValueError, match="output path"):
save_job_output(str(tmp_cron_dir / "outside"), "# Results")
assert not (tmp_cron_dir / "outside").exists()
class TestNextRunHint:
"""Tests for the .next_run hint file written by save_jobs()."""
def test_write_next_run_hint_creates_file(self, tmp_cron_dir):
"""save_jobs() writes the earliest next_run_at to .next_run."""
from cron.jobs import _write_next_run_hint, JOBS_FILE
from datetime import datetime, timezone, timedelta
now = datetime.now(tz=timezone.utc)
jobs = [
{
"id": "job1",
"enabled": True,
"next_run_at": (now + timedelta(minutes=5)).isoformat(),
},
{
"id": "job2",
"enabled": True,
"next_run_at": (now + timedelta(minutes=2)).isoformat(),
},
]
_write_next_run_hint(jobs)
hint_file = JOBS_FILE.parent / ".next_run"
assert hint_file.is_file()
content = hint_file.read_text(encoding="utf-8").strip()
assert content == jobs[1]["next_run_at"] # earliest
def test_write_next_run_hint_skips_disabled(self, tmp_cron_dir):
"""Disabled jobs are ignored when computing the hint."""
from cron.jobs import _write_next_run_hint
jobs = [
{
"id": "disabled-job",
"enabled": False,
"next_run_at": "2026-01-01T00:00:00",
},
{
"id": "enabled-job",
"enabled": True,
"next_run_at": "2027-06-01T09:00:00",
},
]
_write_next_run_hint(jobs)
hint_file = tmp_cron_dir / "cron" / ".next_run"
assert hint_file.read_text(encoding="utf-8").strip() == "2027-06-01T09:00:00"
def test_write_next_run_hint_no_enabled_jobs(self, tmp_cron_dir):
"""When no jobs are enabled, the hint file is removed."""
from cron.jobs import _write_next_run_hint, JOBS_FILE
hint_file = JOBS_FILE.parent / ".next_run"
hint_file.parent.mkdir(parents=True, exist_ok=True)
hint_file.write_text("old-value", encoding="utf-8")
_write_next_run_hint([])
assert not hint_file.exists()
def test_write_next_run_hint_missing_next_run_at(self, tmp_cron_dir):
"""Jobs without next_run_at are skipped."""
from cron.jobs import _write_next_run_hint
jobs = [{"id": "job1", "enabled": True, "next_run_at": None}]
_write_next_run_hint(jobs)
hint_file = tmp_cron_dir / "cron" / ".next_run"
assert not hint_file.exists()
# No crash when saving with no valid next_run_at values
def test_save_jobs_writes_hint(self, tmp_cron_dir):
"""create_job → save_jobs also writes the .next_run hint."""
job = create_job(prompt="Test", schedule="every 1m")
hint_file = tmp_cron_dir / "cron" / ".next_run"
assert hint_file.is_file()
content = hint_file.read_text(encoding="utf-8").strip()
# Should contain a valid ISO timestamp
parsed = datetime.fromisoformat(content)
assert parsed.tzinfo is not None or True # may or may not have timezone

View File

@@ -0,0 +1,56 @@
"""Regression tests for _next_run_sleep_seconds busy-spin floor (salvaged #39244).
The cron ticker shortens its sleep based on the .next_run hint. A past-due
hint (e.g. a paused-but-enabled job whose next_run_at is in the past) must
NOT yield a 0-second sleep — that tight-loops the ticker at 100% CPU. The
floor is 1 second.
"""
from datetime import datetime, timedelta, timezone
import pytest
import gateway.run as gw
@pytest.fixture
def hint(tmp_path, monkeypatch):
home = tmp_path
(home / "cron").mkdir()
monkeypatch.setattr(gw, "get_hermes_home", lambda: home)
def write(dt):
(home / "cron" / ".next_run").write_text(dt.isoformat(), encoding="utf-8")
return write
def test_past_due_hint_floors_to_one_second(hint):
"""A hint in the past must return 1, never 0 (no busy-spin)."""
hint(datetime.now(timezone.utc) - timedelta(minutes=5))
assert gw._next_run_sleep_seconds(60) == 1
def test_imminent_hint_floors_to_one_second(hint):
"""A sub-second-away hint still returns at least 1."""
hint(datetime.now(timezone.utc) + timedelta(milliseconds=200))
assert gw._next_run_sleep_seconds(60) >= 1
def test_soon_hint_shortens_sleep(hint):
"""A hint ~10s out yields a short sleep, capped below interval."""
hint(datetime.now(timezone.utc) + timedelta(seconds=10))
s = gw._next_run_sleep_seconds(60)
assert 1 <= s <= 12
def test_far_hint_caps_at_interval(hint):
"""A hint far out is capped at the normal interval."""
hint(datetime.now(timezone.utc) + timedelta(hours=2))
assert gw._next_run_sleep_seconds(60) == 60
def test_missing_hint_returns_interval(tmp_path, monkeypatch):
monkeypatch.setattr(gw, "get_hermes_home", lambda: tmp_path)
(tmp_path / "cron").mkdir()
assert gw._next_run_sleep_seconds(60) == 60