diff --git a/cron/jobs.py b/cron/jobs.py index c9a41ca2f5..6c0a2405b2 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -311,6 +311,12 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None elif schedule["kind"] == "cron": if not HAS_CRONITER: + logger.warning( + "Cannot compute next run for cron schedule %r: 'croniter' " + "is not installed. Install the 'cron' extra (pip install " + "'hermes-agent[cron]') to re-enable recurring cron jobs.", + schedule.get("expr"), + ) return None cron = croniter(schedule["expr"], now) next_run = cron.get_next(datetime) @@ -698,10 +704,32 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None, # Compute next run job["next_run_at"] = compute_next_run(job["schedule"], now) - # If no next run (one-shot completed), disable + # If no next run, decide whether this is terminal completion + # (one-shot) or a transient failure (recurring schedule couldn't + # compute — e.g. 'croniter' missing from the runtime env). + # Recurring jobs must NEVER be silently disabled: that turns a + # missing runtime dep into "job completed" and the user's + # schedule quietly goes off. See issue #16265. if job["next_run_at"] is None: - job["enabled"] = False - job["state"] = "completed" + kind = job.get("schedule", {}).get("kind") + if kind in ("cron", "interval"): + job["state"] = "error" + if not job.get("last_error"): + job["last_error"] = ( + "Failed to compute next run for recurring " + "schedule (is the 'croniter' package " + "installed in the gateway's Python env?)" + ) + logger.error( + "Job '%s' (%s) could not compute next_run_at; " + "leaving enabled and marking state=error so the " + "job is not silently disabled.", + job.get("name", job["id"]), + kind, + ) + else: + job["enabled"] = False + job["state"] = "completed" elif job.get("state") != "paused": job["state"] = "scheduled" diff --git a/tests/cron/test_jobs.py b/tests/cron/test_jobs.py index 6a9185f072..30bd6b41d5 100644 --- a/tests/cron/test_jobs.py +++ b/tests/cron/test_jobs.py @@ -369,6 +369,88 @@ class TestMarkJobRun: assert updated["last_error"] == "model timeout" assert updated["last_delivery_error"] == "platform 'discord' not enabled" + def test_recurring_cron_not_disabled_when_croniter_missing(self, tmp_cron_dir, monkeypatch): + """Regression test for issue #16265. + + If the gateway runs in an env where `croniter` went missing after a + recurring cron job was persisted, `compute_next_run()` returns None. + `mark_job_run()` must NOT treat that as terminal completion — the job + has to stay enabled with state=error so the user notices, rather than + silently flipping to enabled=false, state=completed. + """ + pytest.importorskip("croniter") # need it to create the job + job = create_job(prompt="Recurring", schedule="0 7,15,23 * * *") + assert job["schedule"]["kind"] == "cron" + + # Simulate the runtime env having lost croniter between job creation + # and this run. + monkeypatch.setattr("cron.jobs.HAS_CRONITER", False) + + mark_job_run(job["id"], success=True) + + updated = get_job(job["id"]) + assert updated is not None, "recurring cron job was deleted" + assert updated["enabled"] is True, ( + "recurring cron job was disabled despite croniter-missing being " + "a runtime dep issue, not a terminal completion" + ) + assert updated["state"] == "error" + assert updated["state"] != "completed" + assert updated["next_run_at"] is None + assert updated["last_error"] + assert "croniter" in updated["last_error"].lower() + + def test_recurring_interval_not_disabled_when_next_run_is_none(self, tmp_cron_dir, monkeypatch): + """Defensive sibling of the cron test — any recurring schedule that + somehow yields next_run_at=None must stay enabled with state=error. + """ + job = create_job(prompt="Recurring", schedule="every 1h") + assert job["schedule"]["kind"] == "interval" + + # Force compute_next_run to return None for this call — simulates + # any future regression where a recurring schedule loses its + # next-run computation (missing dep, corrupt schedule, etc.). + monkeypatch.setattr("cron.jobs.compute_next_run", lambda *a, **kw: None) + + mark_job_run(job["id"], success=True) + + updated = get_job(job["id"]) + assert updated is not None + assert updated["enabled"] is True + assert updated["state"] == "error" + assert updated["state"] != "completed" + + def test_oneshot_still_completes_when_next_run_is_none(self, tmp_cron_dir): + """One-shot jobs must still flip to enabled=false, state=completed + when next_run_at cannot be computed — the #16265 fix must not + regress this path. We bypass create_job and craft a minimal + one-shot record directly so that the repeat-limit branch doesn't + pop the job before we observe the terminal-completion branch. + """ + jobs = [{ + "id": "oneshot-test", + "prompt": "Once", + "schedule": {"kind": "once", "run_at": "2020-01-01T00:00:00+00:00", "display": "once"}, + "repeat": {"times": None, "completed": 0}, + "enabled": True, + "state": "scheduled", + "next_run_at": "2020-01-01T00:00:00+00:00", + "last_run_at": None, + "last_status": None, + "last_error": None, + "last_delivery_error": None, + "created_at": "2020-01-01T00:00:00+00:00", + }] + save_jobs(jobs) + + mark_job_run("oneshot-test", success=True) + + updated = get_job("oneshot-test") + assert updated is not None + assert updated["next_run_at"] is None + assert updated["enabled"] is False + assert updated["state"] == "completed" + class TestAdvanceNextRun: """Tests for advance_next_run() — crash-safety for recurring jobs."""