mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix(gateway): self-request service restarts when invoked in-process
This commit is contained in:
@@ -100,6 +100,59 @@ def _get_service_pids() -> set:
|
||||
return pids
|
||||
|
||||
|
||||
def _get_parent_pid(pid: int) -> int | None:
|
||||
"""Return the parent PID for ``pid``, or ``None`` when unavailable."""
|
||||
if pid <= 1:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ps", "-o", "ppid=", "-p", str(pid)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
return None
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
raw = result.stdout.strip()
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
parent_pid = int(raw.splitlines()[-1].strip())
|
||||
except ValueError:
|
||||
return None
|
||||
return parent_pid if parent_pid > 0 else None
|
||||
|
||||
|
||||
def _is_pid_ancestor_of_current_process(target_pid: int) -> bool:
|
||||
"""Return True when ``target_pid`` is this process or one of its ancestors."""
|
||||
if target_pid <= 0:
|
||||
return False
|
||||
|
||||
pid = os.getpid()
|
||||
seen: set[int] = set()
|
||||
while pid and pid not in seen:
|
||||
if pid == target_pid:
|
||||
return True
|
||||
seen.add(pid)
|
||||
pid = _get_parent_pid(pid) or 0
|
||||
return False
|
||||
|
||||
|
||||
def _request_gateway_self_restart(pid: int) -> bool:
|
||||
"""Ask a running gateway ancestor to restart itself asynchronously."""
|
||||
if not hasattr(signal, "SIGUSR1"):
|
||||
return False
|
||||
if not _is_pid_ancestor_of_current_process(pid):
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, signal.SIGUSR1)
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def find_gateway_pids(exclude_pids: set | None = None) -> list:
|
||||
"""Find PIDs of running gateway processes.
|
||||
|
||||
@@ -971,6 +1024,12 @@ def systemd_restart(system: bool = False):
|
||||
if system:
|
||||
_require_root_for_system_service("restart")
|
||||
refresh_systemd_unit_if_needed(system=system)
|
||||
from gateway.status import get_running_pid
|
||||
|
||||
pid = get_running_pid()
|
||||
if pid is not None and _request_gateway_self_restart(pid):
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service restart requested")
|
||||
return
|
||||
subprocess.run(_systemctl_cmd(system) + ["reload-or-restart", get_service_name()], check=True, timeout=90)
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service restarted")
|
||||
|
||||
@@ -1309,6 +1368,9 @@ def launchd_restart():
|
||||
|
||||
try:
|
||||
pid = get_running_pid()
|
||||
if pid is not None and _request_gateway_self_restart(pid):
|
||||
print("✓ Service restart requested")
|
||||
return
|
||||
if pid is not None:
|
||||
try:
|
||||
terminate_pid(pid, force=False)
|
||||
|
||||
@@ -243,6 +243,7 @@ class TestLaunchdServiceRecovery:
|
||||
target = f"{gateway_cli._launchd_domain()}/{gateway_cli.get_launchd_label()}"
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "_get_restart_drain_timeout", lambda: 12.0)
|
||||
monkeypatch.setattr(gateway_cli, "_request_gateway_self_restart", lambda pid: False)
|
||||
monkeypatch.setattr(gateway_cli, "_wait_for_gateway_exit", lambda timeout, force_after=None: True)
|
||||
monkeypatch.setattr(gateway_cli, "terminate_pid", lambda pid, force=False: calls.append(("term", pid, force)))
|
||||
monkeypatch.setattr(
|
||||
@@ -263,6 +264,29 @@ class TestLaunchdServiceRecovery:
|
||||
["launchctl", "kickstart", "-k", target],
|
||||
]
|
||||
|
||||
def test_launchd_restart_self_requests_graceful_restart_without_kickstart(self, monkeypatch, capsys):
|
||||
calls = []
|
||||
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.get_running_pid",
|
||||
lambda: 321,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_request_gateway_self_restart",
|
||||
lambda pid: calls.append(("self", pid)) or True,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli.subprocess,
|
||||
"run",
|
||||
lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("launchctl should not run")),
|
||||
)
|
||||
|
||||
gateway_cli.launchd_restart()
|
||||
|
||||
assert calls == [("self", 321)]
|
||||
assert "restart requested" in capsys.readouterr().out.lower()
|
||||
|
||||
def test_launchd_stop_uses_bootout_not_kill(self, monkeypatch):
|
||||
"""launchd_stop must bootout the service so KeepAlive doesn't respawn it."""
|
||||
label = gateway_cli.get_launchd_label()
|
||||
@@ -366,6 +390,31 @@ class TestGatewayServiceDetection:
|
||||
|
||||
|
||||
class TestGatewaySystemServiceRouting:
|
||||
def test_systemd_restart_self_requests_graceful_restart_without_reload_or_restart(self, monkeypatch, capsys):
|
||||
calls = []
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: calls.append(("refresh", system)))
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.get_running_pid",
|
||||
lambda: 654,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_request_gateway_self_restart",
|
||||
lambda pid: calls.append(("self", pid)) or True,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli.subprocess,
|
||||
"run",
|
||||
lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("systemctl should not run")),
|
||||
)
|
||||
|
||||
gateway_cli.systemd_restart()
|
||||
|
||||
assert calls == [("refresh", False), ("self", 654)]
|
||||
assert "restart requested" in capsys.readouterr().out.lower()
|
||||
|
||||
def test_gateway_install_passes_system_flags(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
||||
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
||||
|
||||
Reference in New Issue
Block a user