fix(gateway): clear stale runtime error fields after platform recovery

Cherry-picked from PR #2470 by @NaesayerX.

write_runtime_status used None as 'don't change', making it impossible
to clear stale error_code/error_message after platform recovery. Now
uses a sentinel (_UNSET) so None means 'clear this field'. Passing
error_code=None removes the key from the platform payload instead of
leaving haunted error metadata in gateway_state.json.
This commit is contained in:
Teknium
2026-03-22 09:18:24 -07:00
parent 72a6d7dffe
commit 3e5ca0bc13
2 changed files with 53 additions and 13 deletions

View File

@@ -103,6 +103,34 @@ class TestGatewayRuntimeStatus:
assert payload["platforms"]["telegram"]["error_code"] == "telegram_polling_conflict"
assert payload["platforms"]["telegram"]["error_message"] == "another poller is active"
def test_write_runtime_status_clears_stale_platform_error_fields_on_recovery(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
status.write_runtime_status(
gateway_state="startup_failed",
exit_reason="telegram conflict",
platform="telegram",
platform_state="fatal",
error_code="telegram_token_lock",
error_message="another local Hermes gateway is already using this Telegram bot token",
)
status.write_runtime_status(
gateway_state="running",
exit_reason=None,
platform="telegram",
platform_state="connected",
error_code=None,
error_message=None,
)
payload = status.read_runtime_status()
assert payload["gateway_state"] == "running"
assert payload["exit_reason"] is None
assert payload["platforms"]["telegram"]["state"] == "connected"
assert "error_code" not in payload["platforms"]["telegram"]
assert "error_message" not in payload["platforms"]["telegram"]
class TestScopedLocks:
def test_acquire_scoped_lock_rejects_live_other_process(self, tmp_path, monkeypatch):