mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-10 12:18:44 +08:00
Compare commits
3 Commits
salvage/40
...
hermes/gat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8825ad20c1 | ||
|
|
152207c0cc | ||
|
|
e643c79c2c |
@@ -207,9 +207,11 @@ def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
|
||||
|
||||
SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
|
||||
which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
|
||||
seconds), then exits with code 75. Both systemd (``Restart=always``
|
||||
+ ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit
|
||||
= false``) relaunch the process after the graceful exit.
|
||||
seconds), then exits with code 75. Systemd units generated by Hermes use
|
||||
``Restart=on-failure`` together with ``RestartForceExitStatus=75`` so the
|
||||
service is relaunched after the graceful exit without reviving clean
|
||||
``--replace`` takeovers. launchd still uses ``KeepAlive.SuccessfulExit =
|
||||
false`` for the same relaunch behavior.
|
||||
|
||||
This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``,
|
||||
which SIGKILL in-flight agents after a short timeout.
|
||||
@@ -565,7 +567,7 @@ def _gateway_run_args_for_profile(profile: str) -> list[str]:
|
||||
args = [get_python_path(), "-m", "hermes_cli.main"]
|
||||
if profile != "default":
|
||||
args.extend(["--profile", profile])
|
||||
args.extend(["gateway", "run", "--replace"])
|
||||
args.extend(["gateway", "run"])
|
||||
return args
|
||||
|
||||
|
||||
@@ -2240,7 +2242,7 @@ StartLimitIntervalSec=0
|
||||
Type=simple
|
||||
User={username}
|
||||
Group={group_name}
|
||||
ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run --replace
|
||||
ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run
|
||||
WorkingDirectory={working_dir}
|
||||
Environment="HOME={home_dir}"
|
||||
Environment="USER={username}"
|
||||
@@ -2248,7 +2250,7 @@ Environment="LOGNAME={username}"
|
||||
Environment="PATH={sane_path}"
|
||||
Environment="VIRTUAL_ENV={venv_dir}"
|
||||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=always
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
RestartMaxDelaySec=300
|
||||
RestartSteps=5
|
||||
@@ -2278,12 +2280,12 @@ StartLimitIntervalSec=0
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run --replace
|
||||
ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run
|
||||
WorkingDirectory={working_dir}
|
||||
Environment="PATH={sane_path}"
|
||||
Environment="VIRTUAL_ENV={venv_dir}"
|
||||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=always
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
RestartMaxDelaySec=300
|
||||
RestartSteps=5
|
||||
@@ -2875,7 +2877,6 @@ def generate_launchd_plist() -> str:
|
||||
prog_args.extend([
|
||||
"<string>gateway</string>",
|
||||
"<string>run</string>",
|
||||
"<string>--replace</string>",
|
||||
])
|
||||
prog_args_xml = "\n ".join(prog_args)
|
||||
|
||||
@@ -3270,7 +3271,7 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
|
||||
print()
|
||||
|
||||
# Exit with code 1 if gateway fails to connect any platform,
|
||||
# so systemd Restart=always will retry on transient errors
|
||||
# so systemd Restart=on-failure will retry on transient errors
|
||||
verbosity = None if quiet else verbose
|
||||
|
||||
# ── Exit-path diagnostics ────────────────────────────────────────────
|
||||
|
||||
@@ -535,7 +535,7 @@
|
||||
|
||||
restart = mkOption {
|
||||
type = types.str;
|
||||
default = "always";
|
||||
default = "on-failure";
|
||||
description = "systemd Restart= policy.";
|
||||
};
|
||||
|
||||
@@ -974,7 +974,7 @@
|
||||
--env MESSAGING_CWD=${containerWorkDir} \
|
||||
${lib.concatStringsSep " " cfg.container.extraOptions} \
|
||||
${cfg.container.image} \
|
||||
${containerDataDir}/current-package/bin/hermes gateway run --replace ${lib.concatStringsSep " " cfg.extraArgs}
|
||||
${containerDataDir}/current-package/bin/hermes gateway run ${lib.concatStringsSep " " cfg.extraArgs}
|
||||
|
||||
echo "${containerIdentity}" > ${identityFile}
|
||||
fi
|
||||
|
||||
@@ -326,6 +326,8 @@ class TestGeneratedSystemdUnits:
|
||||
assert "ExecStart=" in unit
|
||||
assert "ExecStop=" not in unit
|
||||
assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit
|
||||
assert "Restart=on-failure" in unit
|
||||
assert "Restart=always" not in unit
|
||||
assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit
|
||||
# TimeoutStopSec must exceed the default drain_timeout (60s) so
|
||||
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
|
||||
@@ -387,6 +389,8 @@ class TestGeneratedSystemdUnits:
|
||||
assert "ExecStart=" in unit
|
||||
assert "ExecStop=" not in unit
|
||||
assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit
|
||||
assert "Restart=on-failure" in unit
|
||||
assert "Restart=always" not in unit
|
||||
assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit
|
||||
# TimeoutStopSec must exceed the default drain_timeout (60s) so
|
||||
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
|
||||
@@ -493,7 +497,10 @@ class TestLaunchdServiceRecovery:
|
||||
|
||||
label = gateway_cli.get_launchd_label()
|
||||
domain = gateway_cli._launchd_domain()
|
||||
assert "--replace" in plist_path.read_text(encoding="utf-8")
|
||||
plist_text = plist_path.read_text(encoding="utf-8")
|
||||
assert "<string>gateway</string>" in plist_text
|
||||
assert "<string>run</string>" in plist_text
|
||||
assert "--replace" not in plist_text
|
||||
assert calls[:2] == [
|
||||
["launchctl", "bootout", f"{domain}/{label}"],
|
||||
["launchctl", "bootstrap", domain, str(plist_path)],
|
||||
@@ -1616,7 +1623,8 @@ class TestProfileArg:
|
||||
monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: profile_dir)
|
||||
unit = gateway_cli.generate_systemd_unit(system=False)
|
||||
assert "--profile mybot" in unit
|
||||
assert "gateway run --replace" in unit
|
||||
assert "gateway run" in unit
|
||||
assert "--replace" not in unit
|
||||
|
||||
def test_launchd_plist_includes_profile(self, tmp_path, monkeypatch):
|
||||
"""generate_launchd_plist should include --profile in ProgramArguments for named profiles."""
|
||||
@@ -1628,6 +1636,24 @@ class TestProfileArg:
|
||||
plist = gateway_cli.generate_launchd_plist()
|
||||
assert "<string>--profile</string>" in plist
|
||||
assert "<string>mybot</string>" in plist
|
||||
assert "<string>--replace</string>" not in plist
|
||||
|
||||
def test_gateway_run_args_for_profile_omit_replace(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "get_python_path", lambda: "/venv/bin/python")
|
||||
|
||||
default_args = gateway_cli._gateway_run_args_for_profile("default")
|
||||
named_args = gateway_cli._gateway_run_args_for_profile("mybot")
|
||||
|
||||
assert default_args == ["/venv/bin/python", "-m", "hermes_cli.main", "gateway", "run"]
|
||||
assert named_args == [
|
||||
"/venv/bin/python",
|
||||
"-m",
|
||||
"hermes_cli.main",
|
||||
"--profile",
|
||||
"mybot",
|
||||
"gateway",
|
||||
"run",
|
||||
]
|
||||
|
||||
def test_launchd_plist_path_uses_real_user_home_not_profile_home(self, tmp_path, monkeypatch):
|
||||
profile_dir = tmp_path / ".hermes" / "profiles" / "orcha"
|
||||
|
||||
Reference in New Issue
Block a user