Compare commits

...

2 Commits

Author SHA1 Message Date
Teknium
4eef50022e fix: add all_profiles param + narrow exception handling
- add all_profiles=False to find_gateway_pids() and
  kill_gateway_processes() so hermes update and gateway stop --all
  can still discover processes across all profiles
- narrow bare 'except Exception' to (OSError, subprocess.TimeoutExpired)
- update test mocks to match new signatures
2026-04-11 14:30:29 -07:00
Dominic Grieco
1c19184fbf fix: scope gateway status to the active profile 2026-04-11 14:28:49 -07:00
5 changed files with 158 additions and 32 deletions

View File

@@ -157,30 +157,54 @@ def _request_gateway_self_restart(pid: int) -> bool:
return True return True
def find_gateway_pids(exclude_pids: set | None = None) -> list: def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = False) -> list:
"""Find PIDs of running gateway processes. """Find PIDs of running gateway processes.
Args: Args:
exclude_pids: PIDs to exclude from the result (e.g. service-managed exclude_pids: PIDs to exclude from the result (e.g. service-managed
PIDs that should not be killed during a stale-process sweep). PIDs that should not be killed during a stale-process sweep).
all_profiles: When ``True``, return gateway PIDs across **all**
profiles (the pre-7923 global behaviour). ``hermes update``
needs this because a code update affects every profile.
When ``False`` (default), only PIDs belonging to the current
Hermes profile are returned.
""" """
pids = []
_exclude = exclude_pids or set() _exclude = exclude_pids or set()
pids = [pid for pid in _get_service_pids() if pid not in _exclude]
patterns = [ patterns = [
"hermes_cli.main gateway", "hermes_cli.main gateway",
"hermes_cli.main --profile",
"hermes_cli.main -p",
"hermes_cli/main.py gateway", "hermes_cli/main.py gateway",
"hermes_cli/main.py --profile",
"hermes_cli/main.py -p",
"hermes gateway", "hermes gateway",
"gateway/run.py", "gateway/run.py",
] ]
current_home = str(get_hermes_home().resolve())
current_profile_arg = _profile_arg(current_home)
current_profile_name = current_profile_arg.split()[-1] if current_profile_arg else ""
def _matches_current_profile(command: str) -> bool:
if current_profile_name:
return (
f"--profile {current_profile_name}" in command
or f"-p {current_profile_name}" in command
or f"HERMES_HOME={current_home}" in command
)
if "--profile " in command or " -p " in command:
return False
if "HERMES_HOME=" in command and f"HERMES_HOME={current_home}" not in command:
return False
return True
try: try:
if is_windows(): if is_windows():
# Windows: use wmic to search command lines
result = subprocess.run( result = subprocess.run(
["wmic", "process", "get", "ProcessId,CommandLine", "/FORMAT:LIST"], ["wmic", "process", "get", "ProcessId,CommandLine", "/FORMAT:LIST"],
capture_output=True, text=True, timeout=10 capture_output=True, text=True, timeout=10
) )
# Parse WMIC LIST output: blocks of "CommandLine=...\nProcessId=...\n"
current_cmd = "" current_cmd = ""
for line in result.stdout.split('\n'): for line in result.stdout.split('\n'):
line = line.strip() line = line.strip()
@@ -188,7 +212,7 @@ def find_gateway_pids(exclude_pids: set | None = None) -> list:
current_cmd = line[len("CommandLine="):] current_cmd = line[len("CommandLine="):]
elif line.startswith("ProcessId="): elif line.startswith("ProcessId="):
pid_str = line[len("ProcessId="):] pid_str = line[len("ProcessId="):]
if any(p in current_cmd for p in patterns): if any(p in current_cmd for p in patterns) and (all_profiles or _matches_current_profile(current_cmd)):
try: try:
pid = int(pid_str) pid = int(pid_str)
if pid != os.getpid() and pid not in pids and pid not in _exclude: if pid != os.getpid() and pid not in pids and pid not in _exclude:
@@ -198,41 +222,57 @@ def find_gateway_pids(exclude_pids: set | None = None) -> list:
current_cmd = "" current_cmd = ""
else: else:
result = subprocess.run( result = subprocess.run(
["ps", "aux"], ["ps", "eww", "-ax", "-o", "pid=,command="],
capture_output=True, capture_output=True,
text=True, text=True,
timeout=10, timeout=10,
) )
for line in result.stdout.split('\n'): for line in result.stdout.split('\n'):
# Skip grep and current process stripped = line.strip()
if 'grep' in line or str(os.getpid()) in line: if not stripped or 'grep' in stripped:
continue continue
for pattern in patterns:
if pattern in line: pid = None
parts = line.split() command = ""
if len(parts) > 1:
try: parts = stripped.split(None, 1)
pid = int(parts[1]) if len(parts) == 2:
if pid not in pids and pid not in _exclude: try:
pids.append(pid) pid = int(parts[0])
except ValueError: command = parts[1]
continue except ValueError:
break pid = None
except Exception:
if pid is None:
aux_parts = stripped.split()
if len(aux_parts) > 10 and aux_parts[1].isdigit():
pid = int(aux_parts[1])
command = " ".join(aux_parts[10:])
if pid is None:
continue
if pid == os.getpid() or pid in pids or pid in _exclude:
continue
if any(pattern in command for pattern in patterns) and (all_profiles or _matches_current_profile(command)):
pids.append(pid)
except (OSError, subprocess.TimeoutExpired):
pass pass
return pids return pids
def kill_gateway_processes(force: bool = False, exclude_pids: set | None = None) -> int: def kill_gateway_processes(force: bool = False, exclude_pids: set | None = None,
all_profiles: bool = False) -> int:
"""Kill any running gateway processes. Returns count killed. """Kill any running gateway processes. Returns count killed.
Args: Args:
force: Use the platform's force-kill mechanism instead of graceful terminate. force: Use the platform's force-kill mechanism instead of graceful terminate.
exclude_pids: PIDs to skip (e.g. service-managed PIDs that were just exclude_pids: PIDs to skip (e.g. service-managed PIDs that were just
restarted and should not be killed). restarted and should not be killed).
all_profiles: When ``True``, kill across all profiles. Passed
through to :func:`find_gateway_pids`.
""" """
pids = find_gateway_pids(exclude_pids=exclude_pids) pids = find_gateway_pids(exclude_pids=exclude_pids, all_profiles=all_profiles)
killed = 0 killed = 0
for pid in pids: for pid in pids:
@@ -633,6 +673,17 @@ def print_systemd_linger_guidance() -> None:
print(" If you want the gateway user service to survive logout, run:") print(" If you want the gateway user service to survive logout, run:")
print(" sudo loginctl enable-linger $USER") print(" sudo loginctl enable-linger $USER")
def _launchd_user_home() -> Path:
"""Return the real macOS user home for launchd artifacts.
Profile-mode Hermes often sets ``HOME`` to a profile-scoped directory, but
launchd user agents still live under the actual account home.
"""
import pwd
return Path(pwd.getpwuid(os.getuid()).pw_dir)
def get_launchd_plist_path() -> Path: def get_launchd_plist_path() -> Path:
"""Return the launchd plist path, scoped per profile. """Return the launchd plist path, scoped per profile.
@@ -641,7 +692,7 @@ def get_launchd_plist_path() -> Path:
""" """
suffix = _profile_suffix() suffix = _profile_suffix()
name = f"ai.hermes.gateway-{suffix}" if suffix else "ai.hermes.gateway" name = f"ai.hermes.gateway-{suffix}" if suffix else "ai.hermes.gateway"
return Path.home() / "Library" / "LaunchAgents" / f"{name}.plist" return _launchd_user_home() / "Library" / "LaunchAgents" / f"{name}.plist"
def _detect_venv_dir() -> Path | None: def _detect_venv_dir() -> Path | None:
"""Detect the active virtualenv directory. """Detect the active virtualenv directory.
@@ -839,6 +890,25 @@ def _normalize_service_definition(text: str) -> str:
return "\n".join(line.rstrip() for line in text.strip().splitlines()) return "\n".join(line.rstrip() for line in text.strip().splitlines())
def _normalize_launchd_plist_for_comparison(text: str) -> str:
"""Normalize launchd plist text for staleness checks.
The generated plist intentionally captures a broad PATH assembled from the
invoking shell so user-installed tools remain reachable under launchd.
That makes raw text comparison unstable across shells, so ignore the PATH
payload when deciding whether the installed plist is stale.
"""
import re
normalized = _normalize_service_definition(text)
return re.sub(
r'(<key>PATH</key>\s*<string>)(.*?)(</string>)',
r'\1__HERMES_PATH__\3',
normalized,
flags=re.S,
)
def systemd_unit_is_current(system: bool = False) -> bool: def systemd_unit_is_current(system: bool = False) -> bool:
unit_path = get_systemd_unit_path(system=system) unit_path = get_systemd_unit_path(system=system)
if not unit_path.exists(): if not unit_path.exists():
@@ -1220,7 +1290,7 @@ def launchd_plist_is_current() -> bool:
installed = plist_path.read_text(encoding="utf-8") installed = plist_path.read_text(encoding="utf-8")
expected = generate_launchd_plist() expected = generate_launchd_plist()
return _normalize_service_definition(installed) == _normalize_service_definition(expected) return _normalize_launchd_plist_for_comparison(installed) == _normalize_launchd_plist_for_comparison(expected)
def refresh_launchd_plist_if_needed() -> bool: def refresh_launchd_plist_if_needed() -> bool:
@@ -2540,7 +2610,7 @@ def gateway_command(args):
service_available = True service_available = True
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
pass pass
killed = kill_gateway_processes() killed = kill_gateway_processes(all_profiles=True)
total = killed + (1 if service_available else 0) total = killed + (1 if service_available else 0)
if total: if total:
print(f"✓ Stopped {total} gateway process(es) across all profiles") print(f"✓ Stopped {total} gateway process(es) across all profiles")

View File

@@ -3876,7 +3876,7 @@ def cmd_update(args):
# Exclude PIDs that belong to just-restarted services so we don't # Exclude PIDs that belong to just-restarted services so we don't
# immediately kill the process that systemd/launchd just spawned. # immediately kill the process that systemd/launchd just spawned.
service_pids = _get_service_pids() service_pids = _get_service_pids()
manual_pids = find_gateway_pids(exclude_pids=service_pids) manual_pids = find_gateway_pids(exclude_pids=service_pids, all_profiles=True)
for pid in manual_pids: for pid in manual_pids:
try: try:
os.kill(pid, _signal.SIGTERM) os.kill(pid, _signal.SIGTERM)

View File

@@ -260,7 +260,7 @@ class TestWaitForGatewayExit:
def test_kill_gateway_processes_force_uses_helper(self, monkeypatch): def test_kill_gateway_processes_force_uses_helper(self, monkeypatch):
calls = [] calls = []
monkeypatch.setattr(gateway, "find_gateway_pids", lambda exclude_pids=None: [11, 22]) monkeypatch.setattr(gateway, "find_gateway_pids", lambda exclude_pids=None, all_profiles=False: [11, 22])
monkeypatch.setattr(gateway, "terminate_pid", lambda pid, force=False: calls.append((pid, force))) monkeypatch.setattr(gateway, "terminate_pid", lambda pid, force=False: calls.append((pid, force)))
killed = gateway.kill_gateway_processes(force=True) killed = gateway.kill_gateway_processes(force=True)

View File

@@ -1,6 +1,7 @@
"""Tests for gateway service management helpers.""" """Tests for gateway service management helpers."""
import os import os
import pwd
from pathlib import Path from pathlib import Path
from types import SimpleNamespace from types import SimpleNamespace
@@ -129,7 +130,7 @@ class TestGatewayStopCleanup:
monkeypatch.setattr( monkeypatch.setattr(
gateway_cli, gateway_cli,
"kill_gateway_processes", "kill_gateway_processes",
lambda force=False: kill_calls.append(force) or 2, lambda force=False, all_profiles=False: kill_calls.append(force) or 2,
) )
gateway_cli.gateway_command(SimpleNamespace(gateway_command="stop")) gateway_cli.gateway_command(SimpleNamespace(gateway_command="stop"))
@@ -155,7 +156,7 @@ class TestGatewayStopCleanup:
monkeypatch.setattr( monkeypatch.setattr(
gateway_cli, gateway_cli,
"kill_gateway_processes", "kill_gateway_processes",
lambda force=False: kill_calls.append(force) or 2, lambda force=False, all_profiles=False: kill_calls.append(force) or 2,
) )
gateway_cli.gateway_command(SimpleNamespace(gateway_command="stop", **{"all": True})) gateway_cli.gateway_command(SimpleNamespace(gateway_command="stop", **{"all": True}))
@@ -924,6 +925,23 @@ class TestProfileArg:
assert "<string>--profile</string>" in plist assert "<string>--profile</string>" in plist
assert "<string>mybot</string>" in plist assert "<string>mybot</string>" in plist
def test_launchd_plist_path_uses_real_user_home_not_profile_home(self, tmp_path, monkeypatch):
profile_dir = tmp_path / ".hermes" / "profiles" / "orcha"
profile_dir.mkdir(parents=True)
machine_home = tmp_path / "machine-home"
machine_home.mkdir()
profile_home = profile_dir / "home"
profile_home.mkdir()
monkeypatch.setattr(Path, "home", lambda: profile_home)
monkeypatch.setenv("HERMES_HOME", str(profile_dir))
monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: profile_dir)
monkeypatch.setattr(pwd, "getpwuid", lambda uid: SimpleNamespace(pw_dir=str(machine_home)))
plist_path = gateway_cli.get_launchd_plist_path()
assert plist_path == machine_home / "Library" / "LaunchAgents" / "ai.hermes.gateway-orcha.plist"
class TestRemapPathForUser: class TestRemapPathForUser:
"""Unit tests for _remap_path_for_user().""" """Unit tests for _remap_path_for_user()."""

View File

@@ -191,6 +191,19 @@ class TestLaunchdPlistPath:
raise AssertionError("PATH key not found in plist") raise AssertionError("PATH key not found in plist")
class TestLaunchdPlistCurrentness:
def test_launchd_plist_is_current_ignores_path_drift(self, tmp_path, monkeypatch):
plist_path = tmp_path / "ai.hermes.gateway.plist"
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
monkeypatch.setenv("PATH", "/custom/bin:/usr/bin:/bin")
plist_path.write_text(gateway_cli.generate_launchd_plist(), encoding="utf-8")
monkeypatch.setenv("PATH", "/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin")
assert gateway_cli.launchd_plist_is_current() is True
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# cmd_update — macOS launchd detection # cmd_update — macOS launchd detection
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -536,7 +549,7 @@ class TestServicePidExclusion:
gateway_cli, "_get_service_pids", return_value={SERVICE_PID} gateway_cli, "_get_service_pids", return_value={SERVICE_PID}
), patch.object( ), patch.object(
gateway_cli, "find_gateway_pids", gateway_cli, "find_gateway_pids",
side_effect=lambda exclude_pids=None: ( side_effect=lambda exclude_pids=None, all_profiles=False: (
[SERVICE_PID] if not exclude_pids else [SERVICE_PID] if not exclude_pids else
[p for p in [SERVICE_PID] if p not in exclude_pids] [p for p in [SERVICE_PID] if p not in exclude_pids]
), ),
@@ -579,7 +592,7 @@ class TestServicePidExclusion:
gateway_cli, "_get_service_pids", return_value={SERVICE_PID} gateway_cli, "_get_service_pids", return_value={SERVICE_PID}
), patch.object( ), patch.object(
gateway_cli, "find_gateway_pids", gateway_cli, "find_gateway_pids",
side_effect=lambda exclude_pids=None: ( side_effect=lambda exclude_pids=None, all_profiles=False: (
[SERVICE_PID] if not exclude_pids else [SERVICE_PID] if not exclude_pids else
[p for p in [SERVICE_PID] if p not in exclude_pids] [p for p in [SERVICE_PID] if p not in exclude_pids]
), ),
@@ -618,7 +631,7 @@ class TestServicePidExclusion:
launchctl_loaded=True, launchctl_loaded=True,
) )
def fake_find(exclude_pids=None): def fake_find(exclude_pids=None, all_profiles=False):
_exclude = exclude_pids or set() _exclude = exclude_pids or set()
return [p for p in [SERVICE_PID, MANUAL_PID] if p not in _exclude] return [p for p in [SERVICE_PID, MANUAL_PID] if p not in _exclude]
@@ -760,3 +773,28 @@ class TestFindGatewayPidsExclude:
pids = gateway_cli.find_gateway_pids() pids = gateway_cli.find_gateway_pids()
assert 100 in pids assert 100 in pids
assert 200 in pids assert 200 in pids
def test_filters_to_current_profile(self, monkeypatch, tmp_path):
profile_dir = tmp_path / ".hermes" / "profiles" / "orcha"
profile_dir.mkdir(parents=True)
monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: profile_dir)
def fake_run(cmd, **kwargs):
return subprocess.CompletedProcess(
cmd, 0,
stdout=(
"100 /Users/dgrieco/.hermes/hermes-agent/venv/bin/python -m hermes_cli.main --profile orcha gateway run --replace\n"
"200 /Users/dgrieco/.hermes/hermes-agent/venv/bin/python -m hermes_cli.main --profile other gateway run --replace\n"
),
stderr="",
)
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
monkeypatch.setattr("os.getpid", lambda: 999)
monkeypatch.setattr(gateway_cli, "_get_service_pids", lambda: set())
monkeypatch.setattr(gateway_cli, "_profile_arg", lambda hermes_home=None: "--profile orcha")
pids = gateway_cli.find_gateway_pids()
assert pids == [100]