mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 17:27:37 +08:00
The audit of v4.1 surfaced ~70 issues across the five scripts and three
reference docs — most user-visible (silent file overwrites, status-error
misclassified as success, X-API-Key leaked to S3 on /api/view redirect,
Cloud endpoints that 404 because they were renamed). v5.0.0 fixes those
and fills the gaps that previously forced users to write their own glue
(WebSocket monitoring, batch/sweep, img2img upload helper, dep auto-fix,
log fetch, health check, example workflows).
Critical fixes
- run_workflow.py: poll_status now checks status_str==error BEFORE
completed:true, so a failed run no longer reports success
- run_workflow.py: download_output streams to disk via safe_path_join,
preserves server subfolder structure (no silent overwrites), and
retries with exponential backoff
- run_workflow.py: refuses to overwrite a link with a literal in
inject_params (would silently break wiring)
- _common.py: _StripSensitiveOnRedirectSession (subclasses
requests.Session.rebuild_auth) drops X-API-Key/Cookie on cross-host
redirects — fixes a real key-leak path through Cloud's signed-URL
download flow. Tested
- Cloud routing (verified live): /history → /history_v2,
/models/<f> → /experiment/models/<f>, plus folder aliases for the
unet ↔ diffusion_models and clip ↔ text_encoders rename
- check_deps.py: distinguishes 200/empty vs 404 folder_not_found vs
403 free-tier; emits concrete fix_command per missing dep
- extract_schema.py: prompt vs negative_prompt determined by tracing
KSampler.{positive,negative} connections (incl. through Reroute /
Primitive nodes) instead of meta-title heuristic; symmetric
duplicate-name resolution; cycle-safe trace_to_node
- hardware_check.py: multi-GPU pick-best, Apple variant detection,
Rosetta detection, WSL2, ROCm --json, disk-space check, optional
PyTorch probe; powershell preferred over deprecated wmic
- comfyui_setup.sh: prefers pipx → uvx → pip --user (with PEP-668
fallback); idempotent — skips relaunch if server already up;
configurable port/workspace; persistent log; SIGINT trap
New scripts
- run_batch.py — count or sweep (cartesian product), parallel up to
cloud tier limit
- ws_monitor.py — real-time WebSocket viewer; saves preview frames
- auto_fix_deps.py — runs comfy node install / model download for
whatever check_deps reports missing (with --dry-run)
- health_check.py — single command that runs the verification checklist
(comfy-cli + server + checkpoints + optional smoke test that cancels
itself to avoid burning compute)
- fetch_logs.py — pull traceback / status messages for a prompt_id
Coverage expansion
- Param patterns now cover Flux (BasicScheduler, BasicGuider,
RandomNoise, ModelSamplingFlux), SD3, Wan/Hunyuan/LTX video,
IPAdapter, rgthree, easy-use, AnimateDiff
- Embedding refs in CLIPTextEncode strings extracted as model deps
- ckpt_name / vae_name / lora_name / unet_name now controllable so
workflows can be retargeted per run
Examples
- workflows/{sd15,sdxl,flux_dev}_txt2img.json
- workflows/sdxl_{img2img,inpaint}.json
- workflows/upscale_4x.json
- workflows/{animatediff_video,wan_video_t2v}.json + README
Tests
- 117 tests (105 unit + 8 cloud integration + 4 cross-host security)
- Cloud tests auto-skip without COMFY_CLOUD_API_KEY; verified end-to-end
against live cloud API
Backwards compatibility
- All existing CLI flags continue to work; new behavior is opt-in
(--ws, --input-image, --randomize-seed, --flat-output, etc.)
224 lines
8.0 KiB
Python
Executable File
224 lines
8.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
health_check.py — One-stop verification that the ComfyUI environment is ready.
|
|
|
|
Runs through the verification checklist:
|
|
1. comfy-cli on PATH
|
|
2. server reachable (/system_stats)
|
|
3. at least one checkpoint installed
|
|
4. (optional) a specific workflow's deps are met
|
|
5. (optional) actually submit a tiny test workflow and verify round-trip
|
|
|
|
Usage:
|
|
python3 health_check.py
|
|
python3 health_check.py --host https://cloud.comfy.org
|
|
python3 health_check.py --workflow my.json
|
|
python3 health_check.py --smoke-test # actually submit a tiny workflow
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import shutil
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
from _common import ( # noqa: E402
|
|
DEFAULT_LOCAL_HOST, ENV_API_KEY, emit_json, http_get, parse_model_list,
|
|
resolve_api_key, resolve_url, unwrap_workflow,
|
|
)
|
|
|
|
|
|
def comfy_cli_status() -> dict:
|
|
if shutil.which("comfy"):
|
|
return {"available": True, "method": "comfy", "path": shutil.which("comfy")}
|
|
if shutil.which("uvx"):
|
|
return {"available": True, "method": "uvx",
|
|
"hint": "Invoke as `uvx --from comfy-cli comfy ...`"}
|
|
return {
|
|
"available": False,
|
|
"hint": "Install with: pipx install comfy-cli (or `pip install comfy-cli`)",
|
|
}
|
|
|
|
|
|
def server_status(host: str, headers: dict) -> dict:
|
|
url = resolve_url(host, "/system_stats")
|
|
try:
|
|
r = http_get(url, headers=headers, retries=2, timeout=10)
|
|
if r.status == 200:
|
|
try:
|
|
stats = r.json() or {}
|
|
except Exception:
|
|
stats = {}
|
|
return {"reachable": True, "url": url, "stats": stats}
|
|
return {"reachable": False, "url": url, "http_status": r.status, "body": r.text()[:200]}
|
|
except Exception as e:
|
|
return {"reachable": False, "url": url, "error": str(e)}
|
|
|
|
|
|
def checkpoint_status(host: str, headers: dict) -> dict:
|
|
url = resolve_url(host, "/models/checkpoints")
|
|
try:
|
|
r = http_get(url, headers=headers, retries=2, timeout=15)
|
|
except Exception as e:
|
|
return {"queryable": False, "error": str(e)}
|
|
if r.status != 200:
|
|
return {"queryable": False, "http_status": r.status, "url": url, "body": r.text()[:200]}
|
|
try:
|
|
models = parse_model_list(r.json())
|
|
except Exception:
|
|
models = set()
|
|
return {"queryable": True, "count": len(models),
|
|
"first_few": sorted(models)[:5]}
|
|
|
|
|
|
SMOKE_WORKFLOW = {
|
|
# Minimal SD1.5 workflow that doesn't depend on rare nodes.
|
|
# 256x256 + 1 step is the smallest config that doesn't trigger SDXL/Flux
|
|
# validation errors while still executing fast.
|
|
"3": {
|
|
"class_type": "KSampler",
|
|
"inputs": {
|
|
"seed": 1, "steps": 1, "cfg": 7.0,
|
|
"sampler_name": "euler", "scheduler": "normal", "denoise": 1.0,
|
|
"model": ["4", 0], "positive": ["6", 0], "negative": ["7", 0],
|
|
"latent_image": ["5", 0],
|
|
},
|
|
},
|
|
"4": {"class_type": "CheckpointLoaderSimple",
|
|
"inputs": {"ckpt_name": "REPLACE_ME"}},
|
|
"5": {"class_type": "EmptyLatentImage",
|
|
"inputs": {"width": 256, "height": 256, "batch_size": 1}},
|
|
"6": {"class_type": "CLIPTextEncode",
|
|
"inputs": {"text": "test", "clip": ["4", 1]}},
|
|
"7": {"class_type": "CLIPTextEncode",
|
|
"inputs": {"text": "", "clip": ["4", 1]}},
|
|
"9": {"class_type": "SaveImage",
|
|
"inputs": {"filename_prefix": "smoke", "images": ["3", 0]}},
|
|
}
|
|
|
|
|
|
def smoke_test(host: str, headers: dict, ckpt_name: str | None) -> dict:
|
|
"""Submit a tiny workflow and verify the server accepts it.
|
|
|
|
Cancels the job immediately after acceptance so we don't burn GPU
|
|
time / cloud minutes on a smoke test.
|
|
"""
|
|
if not ckpt_name:
|
|
return {"ran": False, "reason": "no checkpoint available"}
|
|
wf = json.loads(json.dumps(SMOKE_WORKFLOW))
|
|
wf["4"]["inputs"]["ckpt_name"] = ckpt_name
|
|
|
|
# Lazy import to avoid circular issues
|
|
from run_workflow import ComfyRunner
|
|
api_key = headers.get("X-API-Key")
|
|
runner = ComfyRunner(host=host, api_key=api_key)
|
|
sub = runner.submit(wf)
|
|
if "_http_error" in sub:
|
|
return {"ran": True, "submitted": False,
|
|
"http_status": sub["_http_error"], "body": sub.get("body")}
|
|
pid = sub.get("prompt_id")
|
|
if not pid:
|
|
return {"ran": True, "submitted": False, "response": sub}
|
|
|
|
# Cancel so we don't actually waste compute on the smoke test.
|
|
cancelled = False
|
|
try:
|
|
cancelled = runner.cancel(pid)
|
|
except Exception:
|
|
pass
|
|
|
|
return {
|
|
"ran": True, "submitted": True, "prompt_id": pid,
|
|
"cancelled_after_submit": cancelled,
|
|
"note": "Submission accepted; cancelled to avoid running the full pipeline.",
|
|
}
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
p = argparse.ArgumentParser(description="One-stop ComfyUI health check")
|
|
p.add_argument("--host", default=DEFAULT_LOCAL_HOST)
|
|
p.add_argument("--api-key", help=f"or set ${ENV_API_KEY}")
|
|
p.add_argument("--workflow", help="Optional: also run check_deps on this workflow")
|
|
p.add_argument("--smoke-test", action="store_true",
|
|
help="Submit a tiny test workflow and verify round-trip")
|
|
p.add_argument("--strict", action="store_true",
|
|
help="Exit non-zero on any non-pass condition (including warnings)")
|
|
args = p.parse_args(argv)
|
|
|
|
api_key = resolve_api_key(args.api_key)
|
|
headers = {"X-API-Key": api_key} if api_key else {}
|
|
|
|
cli = comfy_cli_status()
|
|
server = server_status(args.host, headers)
|
|
ckpts = checkpoint_status(args.host, headers) if server.get("reachable") else None
|
|
|
|
# ---- workflow check ----
|
|
workflow_check: dict | None = None
|
|
if args.workflow:
|
|
wf_path = Path(args.workflow).expanduser()
|
|
if not wf_path.exists():
|
|
workflow_check = {"error": "workflow file not found"}
|
|
else:
|
|
try:
|
|
with wf_path.open() as f:
|
|
workflow = unwrap_workflow(json.load(f))
|
|
from check_deps import check_deps
|
|
workflow_check = check_deps(workflow, host=args.host, api_key=api_key)
|
|
except (ValueError, json.JSONDecodeError) as e:
|
|
workflow_check = {"error": str(e)}
|
|
|
|
smoke = None
|
|
if args.smoke_test and server.get("reachable"):
|
|
first_ckpt = ckpts["first_few"][0] if ckpts and ckpts.get("first_few") else None
|
|
smoke = smoke_test(args.host, headers, first_ckpt)
|
|
|
|
# ---- verdict ----
|
|
verdict = "pass"
|
|
reasons: list[str] = []
|
|
if not server.get("reachable"):
|
|
verdict = "fail"
|
|
reasons.append("server unreachable")
|
|
if ckpts and ckpts.get("queryable") and ckpts.get("count", 0) == 0:
|
|
verdict = "warn" if verdict == "pass" else verdict
|
|
reasons.append("no checkpoints installed")
|
|
if workflow_check and workflow_check.get("error"):
|
|
verdict = "fail"
|
|
reasons.append(f"workflow check failed: {workflow_check['error']}")
|
|
elif workflow_check and not workflow_check.get("is_ready"):
|
|
if workflow_check.get("node_check_skipped"):
|
|
reasons.append("node check skipped (cloud free tier)")
|
|
else:
|
|
verdict = "fail"
|
|
reasons.append("workflow has missing deps")
|
|
if smoke and smoke.get("ran") and not smoke.get("submitted"):
|
|
verdict = "fail"
|
|
reasons.append("smoke-test submission failed")
|
|
if not cli.get("available"):
|
|
verdict = "warn" if verdict == "pass" else verdict
|
|
reasons.append("comfy-cli not on PATH (lifecycle commands won't work)")
|
|
|
|
report = {
|
|
"verdict": verdict,
|
|
"reasons": reasons,
|
|
"host": args.host,
|
|
"comfy_cli": cli,
|
|
"server": server,
|
|
"checkpoints": ckpts,
|
|
"workflow_check": workflow_check,
|
|
"smoke_test": smoke,
|
|
}
|
|
emit_json(report)
|
|
|
|
if verdict == "pass":
|
|
return 0
|
|
if verdict == "warn":
|
|
return 1 if args.strict else 0
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|