mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-06 10:47:12 +08:00
Skips the ~145 ms AST scan of every tools/*.py file at every startup.
CLI cold import drops ~100 ms on a machine where Phase 1's other
lazy-imports already landed.
## What changed
tools/registry.py:_load_manifest() reads a committed tuple of tool
module names from tools/_manifest.py. discover_builtin_tools() tries
the manifest first; falls back to the AST scan only when the manifest
is missing (fresh checkout, or the generator hasn't been run yet) or
when the caller passes a non-default tools_dir (tests, embedders).
scripts/build_tool_manifest.py regenerates the manifest. Run manually
after adding a new tools/*.py, or let CI catch drift.
.github/workflows/tests.yml runs the generator with --check on every
PR. Rejects commits that add/remove a self-registering tool module
without updating the manifest.
tools/_manifest.py is the committed artifact, 27 built-in modules.
## Design decisions
No runtime mtime-drift check. Considered adding a stat walk of
tools/*.py at startup to auto-invalidate the manifest on dev machines,
rejected because:
- Adds overhead to the path we're trying to speed up (66 stats,
~5-10 ms worst case, defeats part of the savings).
- Gives false positives when devs edit tools/ helpers (ansi_strip,
approval, budget_config, path_security, etc.) that don't call
registry.register() — those edits wouldn't require a regen.
- CI check is strictly better: catches the real error class (manifest
missing a self-registering tool) without runtime cost, and developers
get feedback at PR time instead of silent divergence.
Manifest coverage: built-in tools/*.py ONLY. Plugin tools (via
ctx.register_tool) and MCP tools (registered dynamically on server
connect) use separate registration paths that this change doesn't
touch. End-user-visible behavior: unchanged — installing a plugin or
connecting an MCP server works exactly as before.
## Failure modes
1. Manifest lists a tool that no longer exists → importlib.import_module
raises, logged as warning, skipped. Non-fatal but noisy. CI check
catches this class before merge.
2. Manifest missing a new tool → the tool isn't registered at startup.
User-visible. CI --check catches before merge.
3. Manifest missing entirely (tools/_manifest.py deleted) → fallback to
AST scan, startup gets slower by ~145 ms but behavior is identical.
## Measurements
discover_builtin_tools() isolated:
without manifest (AST scan): 674 ms min, 702 ms median
with manifest: 524 ms min, 550 ms median
savings: 150 ms
Full 'import cli':
before (main): ~950 ms
after (this PR): ~846 ms
savings: ~100 ms
(The isolated savings is bigger than the cli delta because some AST
scan overhead was being double-paid via transitive imports; manifest
removes it once at the source.)
## Validation
- 121/121 tests/tools/test_registry + test_terminal_tool +
test_skills_tool pass (unchanged from main).
- 68 tools registered with manifest (matches baseline).
- CI --check flag verified: passes for the committed manifest, fails
with exit 1 when I drop a new tool into tools/ without regenerating.
- Live hermes chat smoke: 1 turn + /quit, agent successfully enumerated
all registered tools when asked to list them. 36-line log window,
zero errors.
137 lines
4.5 KiB
Python
Executable File
137 lines
4.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Generate ``tools/_manifest.py`` — a static list of built-in tool modules.
|
|
|
|
At runtime, ``tools.registry.discover_builtin_tools()`` reads this manifest
|
|
instead of AST-scanning every ``tools/*.py`` file to find ones that call
|
|
``registry.register()``. Saves ~145 ms at every CLI/gateway startup.
|
|
|
|
When to run:
|
|
- Automatically: the ``discover_builtin_tools`` fallback triggers when any
|
|
``tools/*.py`` file has an mtime newer than the manifest. This surfaces
|
|
a warning in dev. Run this script and commit the regenerated manifest
|
|
to silence the warning.
|
|
- Manually: ``python scripts/build_tool_manifest.py``
|
|
- CI: the build-tools GitHub workflow runs this and diff-checks on every PR.
|
|
|
|
Usage:
|
|
python scripts/build_tool_manifest.py # regenerate in place
|
|
python scripts/build_tool_manifest.py --check # exit 1 if stale (for CI)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import ast
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
TOOLS_DIR = REPO_ROOT / "tools"
|
|
MANIFEST_PATH = TOOLS_DIR / "_manifest.py"
|
|
|
|
# Exclusions match tools/registry.py:discover_builtin_tools — these files live
|
|
# in tools/ but are infrastructure (not self-registering modules).
|
|
SKIP_FILES = {
|
|
"__init__.py",
|
|
"_manifest.py",
|
|
"registry.py",
|
|
"mcp_tool.py", # MCP registers dynamically at runtime, not at import
|
|
}
|
|
|
|
|
|
def _is_registry_register_call(node: ast.AST) -> bool:
|
|
"""Return True when *node* is a ``registry.register(...)`` call expression."""
|
|
if not isinstance(node, ast.Expr) or not isinstance(node.value, ast.Call):
|
|
return False
|
|
func = node.value.func
|
|
return (
|
|
isinstance(func, ast.Attribute)
|
|
and func.attr == "register"
|
|
and isinstance(func.value, ast.Name)
|
|
and func.value.id == "registry"
|
|
)
|
|
|
|
|
|
def _module_registers_tools(path: Path) -> bool:
|
|
try:
|
|
tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
|
|
except (OSError, SyntaxError):
|
|
return False
|
|
return any(_is_registry_register_call(stmt) for stmt in tree.body)
|
|
|
|
|
|
def scan_tool_modules() -> list[str]:
|
|
"""Return sorted list of ``tools.<stem>`` module names that self-register."""
|
|
return sorted(
|
|
f"tools.{path.stem}"
|
|
for path in TOOLS_DIR.glob("*.py")
|
|
if path.name not in SKIP_FILES and _module_registers_tools(path)
|
|
)
|
|
|
|
|
|
MANIFEST_HEADER = '''\
|
|
"""Auto-generated list of built-in tool modules that call ``registry.register()``.
|
|
|
|
DO NOT EDIT MANUALLY. Regenerate with:
|
|
|
|
python scripts/build_tool_manifest.py
|
|
|
|
This file is read at startup by ``tools.registry.discover_builtin_tools()`` to
|
|
skip the ~145 ms AST scan of every ``tools/*.py`` file. When a ``tools/*.py``
|
|
file is added, modified, or removed, the dev-mode mtime check in
|
|
``discover_builtin_tools`` will log a warning and fall back to the AST scan —
|
|
run this script to regenerate and commit.
|
|
|
|
Only covers *built-in* tools (shipped in ``tools/*.py``). Plugin tools and
|
|
MCP-registered tools use separate discovery paths and are not listed here.
|
|
"""
|
|
|
|
TOOL_MODULES: tuple[str, ...] = (
|
|
'''
|
|
|
|
MANIFEST_FOOTER = ")\n"
|
|
|
|
|
|
def render_manifest(modules: list[str]) -> str:
|
|
lines = [MANIFEST_HEADER]
|
|
for name in modules:
|
|
lines.append(f" {name!r},\n")
|
|
lines.append(MANIFEST_FOOTER)
|
|
return "".join(lines)
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
parser.add_argument(
|
|
"--check",
|
|
action="store_true",
|
|
help="Exit 1 if the on-disk manifest doesn't match what would be generated (for CI).",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
modules = scan_tool_modules()
|
|
new_content = render_manifest(modules)
|
|
|
|
if args.check:
|
|
if not MANIFEST_PATH.exists():
|
|
print(f"{MANIFEST_PATH} is missing — run: python scripts/build_tool_manifest.py", file=sys.stderr)
|
|
return 1
|
|
current = MANIFEST_PATH.read_text(encoding="utf-8")
|
|
if current != new_content:
|
|
print(
|
|
f"{MANIFEST_PATH} is stale — run: python scripts/build_tool_manifest.py",
|
|
file=sys.stderr,
|
|
)
|
|
return 1
|
|
print(f"OK: {MANIFEST_PATH} is up-to-date ({len(modules)} tool modules).")
|
|
return 0
|
|
|
|
MANIFEST_PATH.write_text(new_content, encoding="utf-8")
|
|
print(f"Wrote {MANIFEST_PATH} ({len(modules)} tool modules).")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|