diff --git a/agent/workspace.py b/agent/workspace.py new file mode 100644 index 00000000000..4c41ae01791 --- /dev/null +++ b/agent/workspace.py @@ -0,0 +1,335 @@ +from __future__ import annotations + +import fnmatch +import json +import os +import re +from dataclasses import asdict, dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Iterable + +from hermes_cli.config import get_hermes_home, load_config + +DEFAULT_WORKSPACE_SUBDIRS = ("docs", "notes", "data", "code", "uploads", "media") +_BINARY_SUFFIXES = { + ".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".ico", ".pdf", + ".zip", ".gz", ".tar", ".xz", ".7z", ".mp3", ".wav", ".ogg", ".mp4", + ".mov", ".avi", ".sqlite", ".db", ".bin", ".exe", ".dll", ".so", ".dylib", + ".woff", ".woff2", ".ttf", ".otf", +} + + +@dataclass +class WorkspacePaths: + workspace_root: Path + knowledgebase_root: Path + indexes_dir: Path + manifests_dir: Path + cache_dir: Path + manifest_path: Path + + +@dataclass +class WorkspaceEntry: + relative_path: str + size_bytes: int + modified_at: str + mime_type: str + + +def _utc_now_iso() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _ensure_config(config: dict[str, Any] | None = None) -> dict[str, Any]: + return config if config is not None else load_config() + + +def _resolve_root(raw_path: str | None, fallback_name: str) -> Path: + if raw_path: + expanded = os.path.expandvars(os.path.expanduser(raw_path)) + return Path(expanded).resolve() + return (get_hermes_home() / fallback_name).resolve() + + +def get_workspace_paths(config: dict[str, Any] | None = None, ensure: bool = False) -> WorkspacePaths: + cfg = _ensure_config(config) + workspace_cfg = cfg.get("workspace", {}) or {} + kb_cfg = cfg.get("knowledgebase", {}) or {} + + workspace_root = _resolve_root(workspace_cfg.get("path"), "workspace") + knowledgebase_root = _resolve_root(kb_cfg.get("path"), "knowledgebase") + indexes_dir = knowledgebase_root / "indexes" + manifests_dir = knowledgebase_root / "manifests" + cache_dir = knowledgebase_root / "cache" + manifest_path = manifests_dir / "workspace.json" + + if ensure: + workspace_root.mkdir(parents=True, exist_ok=True) + for subdir in DEFAULT_WORKSPACE_SUBDIRS: + (workspace_root / subdir).mkdir(parents=True, exist_ok=True) + knowledgebase_root.mkdir(parents=True, exist_ok=True) + indexes_dir.mkdir(parents=True, exist_ok=True) + manifests_dir.mkdir(parents=True, exist_ok=True) + cache_dir.mkdir(parents=True, exist_ok=True) + + return WorkspacePaths( + workspace_root=workspace_root, + knowledgebase_root=knowledgebase_root, + indexes_dir=indexes_dir, + manifests_dir=manifests_dir, + cache_dir=cache_dir, + manifest_path=manifest_path, + ) + + +def _workspace_enabled(config: dict[str, Any]) -> bool: + return bool((config.get("workspace", {}) or {}).get("enabled", True)) + + +def _load_ignore_patterns(workspace_root: Path, include_hidden: bool = False) -> list[str]: + patterns: list[str] = [] + ignore_file = workspace_root / ".hermesignore" + if not include_hidden and ignore_file.exists(): + raw = ignore_file.read_text(encoding="utf-8", errors="ignore") + for line in raw.splitlines(): + stripped = line.strip() + if stripped and not stripped.startswith("#"): + patterns.append(stripped) + return patterns + + +def _is_hidden_rel(rel_path: Path) -> bool: + return any(part.startswith(".") for part in rel_path.parts) + + +def _matches_ignore(rel_posix: str, patterns: Iterable[str]) -> bool: + for pattern in patterns: + normalized = pattern.rstrip("/") + if fnmatch.fnmatch(rel_posix, normalized): + return True + if fnmatch.fnmatch(Path(rel_posix).name, normalized): + return True + if rel_posix.startswith(normalized + "/"): + return True + return False + + +def _iter_workspace_files(paths: WorkspacePaths, config: dict[str, Any], include_hidden: bool = False) -> Iterable[Path]: + kb_cfg = config.get("knowledgebase", {}) or {} + indexing_cfg = kb_cfg.get("indexing", {}) or {} + max_file_mb = int(indexing_cfg.get("max_file_mb", 10) or 10) + max_file_bytes = max_file_mb * 1024 * 1024 + patterns = _load_ignore_patterns(paths.workspace_root, include_hidden=include_hidden) + + for file_path in sorted(paths.workspace_root.rglob("*")): + if not file_path.is_file(): + continue + rel_path = file_path.relative_to(paths.workspace_root) + if rel_path.as_posix() == ".hermesignore": + continue + if not include_hidden and _is_hidden_rel(rel_path): + continue + if _matches_ignore(rel_path.as_posix(), patterns): + continue + try: + if file_path.stat().st_size > max_file_bytes: + continue + except OSError: + continue + yield file_path + + +def _mime_for(path: Path) -> str: + ext = path.suffix.lower() + if ext == ".md": + return "text/markdown" + if ext in {".txt", ".py", ".js", ".ts", ".json", ".yaml", ".yml", ".toml", ".rst"}: + return "text/plain" + return "application/octet-stream" + + +def _entry_for(path: Path, root: Path) -> WorkspaceEntry: + stat_result = path.stat() + return WorkspaceEntry( + relative_path=path.relative_to(root).as_posix(), + size_bytes=stat_result.st_size, + modified_at=datetime.fromtimestamp(stat_result.st_mtime, tz=timezone.utc).isoformat(), + mime_type=_mime_for(path), + ) + + +def build_workspace_manifest(config: dict[str, Any] | None = None) -> dict[str, Any]: + cfg = _ensure_config(config) + if not _workspace_enabled(cfg): + return {"success": False, "error": "Workspace is disabled in config."} + + paths = get_workspace_paths(cfg, ensure=True) + entries = [_entry_for(path, paths.workspace_root) for path in _iter_workspace_files(paths, cfg)] + + payload = { + "success": True, + "generated_at": _utc_now_iso(), + "workspace_root": str(paths.workspace_root), + "knowledgebase_root": str(paths.knowledgebase_root), + "manifest_path": str(paths.manifest_path), + "file_count": len(entries), + "files": [asdict(entry) for entry in entries], + } + paths.manifest_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + return payload + + +def workspace_status(config: dict[str, Any] | None = None) -> dict[str, Any]: + cfg = _ensure_config(config) + if not _workspace_enabled(cfg): + return {"success": False, "error": "Workspace is disabled in config."} + + paths = get_workspace_paths(cfg, ensure=True) + entries = [_entry_for(path, paths.workspace_root) for path in _iter_workspace_files(paths, cfg)] + category_counts: dict[str, int] = {} + for entry in entries: + top = entry.relative_path.split("/", 1)[0] + category_counts[top] = category_counts.get(top, 0) + 1 + + return { + "success": True, + "workspace_root": str(paths.workspace_root), + "knowledgebase_root": str(paths.knowledgebase_root), + "manifest_path": str(paths.manifest_path), + "manifest_exists": paths.manifest_path.exists(), + "file_count": len(entries), + "category_counts": category_counts, + "default_subdirs": list(DEFAULT_WORKSPACE_SUBDIRS), + } + + +def workspace_list( + config: dict[str, Any] | None = None, + relative_path: str = "", + recursive: bool = True, + limit: int = 100, + offset: int = 0, + include_hidden: bool = False, +) -> dict[str, Any]: + cfg = _ensure_config(config) + if not _workspace_enabled(cfg): + return {"success": False, "error": "Workspace is disabled in config."} + + paths = get_workspace_paths(cfg, ensure=True) + base = paths.workspace_root + if relative_path: + candidate = (base / relative_path).resolve() + try: + candidate.relative_to(base) + except ValueError: + return {"success": False, "error": "Requested path escapes workspace root."} + base = candidate + if not base.exists(): + return {"success": False, "error": f"Workspace path not found: {relative_path}"} + + entries: list[dict[str, Any]] = [] + patterns = _load_ignore_patterns(paths.workspace_root, include_hidden=include_hidden) + iterator = base.rglob("*") if recursive else base.iterdir() + for path in sorted(iterator): + if not path.is_file(): + continue + rel = path.relative_to(paths.workspace_root) + if not include_hidden and _is_hidden_rel(rel): + continue + if _matches_ignore(rel.as_posix(), patterns): + continue + entries.append(asdict(_entry_for(path, paths.workspace_root))) + + sliced = entries[offset:offset + limit] + return { + "success": True, + "workspace_root": str(paths.workspace_root), + "base_path": str(base), + "count": len(sliced), + "total_count": len(entries), + "entries": sliced, + } + + +def _is_probably_binary(path: Path) -> bool: + if path.suffix.lower() in _BINARY_SUFFIXES: + return True + try: + chunk = path.read_bytes()[:1024] + except OSError: + return True + return b"\x00" in chunk + + +def workspace_search( + query: str, + config: dict[str, Any] | None = None, + relative_path: str = "", + file_glob: str | None = None, + limit: int = 20, + offset: int = 0, + include_hidden: bool = False, +) -> dict[str, Any]: + cfg = _ensure_config(config) + if not _workspace_enabled(cfg): + return {"success": False, "error": "Workspace is disabled in config."} + if not query.strip(): + return {"success": False, "error": "Query cannot be empty."} + + paths = get_workspace_paths(cfg, ensure=True) + base = paths.workspace_root + if relative_path: + candidate = (base / relative_path).resolve() + try: + candidate.relative_to(base) + except ValueError: + return {"success": False, "error": "Requested path escapes workspace root."} + base = candidate + if not base.exists(): + return {"success": False, "error": f"Workspace path not found: {relative_path}"} + + try: + regex = re.compile(query) + except re.error as e: + return {"success": False, "error": f"Invalid regex: {e}"} + patterns = _load_ignore_patterns(paths.workspace_root, include_hidden=include_hidden) + matches: list[dict[str, Any]] = [] + + for file_path in sorted(base.rglob("*")): + if not file_path.is_file(): + continue + rel = file_path.relative_to(paths.workspace_root) + if not include_hidden and _is_hidden_rel(rel): + continue + if _matches_ignore(rel.as_posix(), patterns): + continue + if file_glob and not fnmatch.fnmatch(file_path.name, file_glob): + continue + if _is_probably_binary(file_path): + continue + try: + text = file_path.read_text(encoding="utf-8", errors="ignore") + except OSError: + continue + for line_number, line in enumerate(text.splitlines(), start=1): + if regex.search(line): + matches.append( + { + "relative_path": rel.as_posix(), + "path": str(file_path), + "line": line_number, + "content": line, + } + ) + + sliced = matches[offset:offset + limit] + return { + "success": True, + "query": query, + "workspace_root": str(paths.workspace_root), + "count": len(sliced), + "total_count": len(matches), + "matches": sliced, + } diff --git a/cli.py b/cli.py index 094be22e973..ad32e4fed2b 100755 --- a/cli.py +++ b/cli.py @@ -2727,6 +2727,11 @@ class HermesCLI: from hermes_cli.skills_hub import handle_skills_slash handle_skills_slash(cmd, ChatConsole()) + def _handle_workspace_command(self, cmd: str): + """Handle /workspace slash command — delegates to hermes_cli.workspace.""" + from hermes_cli.workspace import handle_workspace_slash + handle_workspace_slash(cmd, ChatConsole()) + def _show_gateway_status(self): """Show status of the gateway and connected messaging platforms.""" from gateway.config import load_gateway_config, Platform @@ -3027,6 +3032,8 @@ class HermesCLI: elif cmd_lower.startswith("/skills"): with self._busy_command(self._slow_command_status(cmd_original)): self._handle_skills_command(cmd_original) + elif cmd_lower.startswith("/workspace"): + self._handle_workspace_command(cmd_original) elif cmd_lower == "/platforms" or cmd_lower == "/gateway": self._show_gateway_status() elif cmd_lower == "/verbose": diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index a9a1a67ba7c..bac015b8384 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -43,6 +43,7 @@ COMMANDS_BY_CATEGORY = { "/tools": "List available tools", "/toolsets": "List available toolsets", "/skills": "Search, install, inspect, or manage skills from online registries", + "/workspace": "Inspect, index, list, or search the Hermes workspace", "/cron": "Manage scheduled tasks (list, add, remove)", "/reload-mcp": "Reload MCP servers from config.yaml", }, diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 492d00aecd7..af7f823af59 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -83,7 +83,7 @@ def ensure_hermes_home(): home = get_hermes_home() home.mkdir(parents=True, exist_ok=True) _secure_dir(home) - for subdir in ("cron", "sessions", "logs", "memories"): + for subdir in ("cron", "sessions", "logs", "memories", "workspace", "knowledgebase"): d = home / subdir d.mkdir(parents=True, exist_ok=True) _secure_dir(d) @@ -249,6 +249,52 @@ DEFAULT_CONFIG = { # injected at the start of every API call for few-shot priming. # Never saved to sessions, logs, or trajectories. "prefill_messages_file": "", + + "workspace": { + "enabled": True, + "path": "", # Empty = HERMES_HOME/workspace + "auto_create": True, + "persist_gateway_uploads": "ask", # off | ask | always + }, + + "knowledgebase": { + "enabled": True, + "path": "", # Empty = HERMES_HOME/knowledgebase + "roots": [], # Empty = [workspace path] + "retrieval_mode": "off", # off | gated | always + "auto_index": True, + "watch_for_changes": False, + "max_injected_chunks": 6, + "max_injected_tokens": 3200, + "dense_top_k": 40, + "sparse_top_k": 40, + "fused_top_k": 30, + "final_top_k": 8, + "min_fused_score": 0.0, + "injection_format": "sourced_note", + "chunking": { + "default_tokens": 512, + "overlap_tokens": 80, + "code_strategy": "structural", + "markdown_strategy": "headings", + }, + "embeddings": { + "provider": "local", + "model": "embeddinggemma-300m", + "dimensions": 768, + }, + "reranker": { + "enabled": False, + "provider": "local", + "model": "bge-reranker-v2-m3", + }, + "indexing": { + "respect_gitignore": True, + "respect_hermesignore": True, + "include_hidden": False, + "max_file_mb": 10, + }, + }, # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth. # This section is only needed for hermes-specific overrides; everything else @@ -284,7 +330,7 @@ DEFAULT_CONFIG = { }, # Config schema version - bump this when adding new required fields - "_config_version": 7, + "_config_version": 8, } # ============================================================================= diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 6adf4ff709d..43bd83f0d34 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2756,6 +2756,35 @@ For more help on a command: skills_parser.set_defaults(func=cmd_skills) + # ========================================================================= + # workspace command + # ========================================================================= + workspace_parser = subparsers.add_parser( + "workspace", + help="Inspect and search the Hermes workspace", + description="Inspect workspace status, rebuild the manifest, list files, or search within the Hermes workspace.", + ) + workspace_subparsers = workspace_parser.add_subparsers(dest="workspace_action") + workspace_subparsers.add_parser("status", help="Show workspace roots, manifest path, and file counts") + workspace_subparsers.add_parser("index", help="Rebuild the workspace manifest") + workspace_list = workspace_subparsers.add_parser("list", help="List files in the workspace") + workspace_list.add_argument("path", nargs="?", default="", help="Optional subpath within the workspace") + workspace_list.add_argument("--shallow", action="store_false", dest="recursive", default=True, help="Only list the immediate directory") + workspace_list.add_argument("--limit", type=int, default=20, help="Maximum files to show") + workspace_list.add_argument("--offset", type=int, default=0, help="Skip the first N files") + workspace_search = workspace_subparsers.add_parser("search", help="Search text content inside workspace files") + workspace_search.add_argument("query", help="Regex query to search for") + workspace_search.add_argument("--path", default="", help="Optional subpath within the workspace") + workspace_search.add_argument("--file-glob", default=None, help="Optional filename glob filter, e.g. '*.md'") + workspace_search.add_argument("--limit", type=int, default=10, help="Maximum matches to show") + workspace_search.add_argument("--offset", type=int, default=0, help="Skip the first N matches") + + def cmd_workspace(args): + from hermes_cli.workspace import workspace_command + workspace_command(args) + + workspace_parser.set_defaults(func=cmd_workspace) + # ========================================================================= # honcho command # ========================================================================= diff --git a/hermes_cli/workspace.py b/hermes_cli/workspace.py new file mode 100644 index 00000000000..24cd59848c1 --- /dev/null +++ b/hermes_cli/workspace.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +from typing import Optional + +from rich.console import Console + +from agent.workspace import build_workspace_manifest, workspace_list, workspace_search, workspace_status +from hermes_cli.config import load_config + + +def _console(console: Optional[Console]) -> Console: + return console or Console() + + +def _print_status(console: Console) -> None: + data = workspace_status(load_config()) + if not data.get("success"): + console.print(f"[bold red]{data.get('error', 'Workspace unavailable')}[/]") + return + console.print(f"Workspace root: {data['workspace_root']}") + console.print(f"Knowledgebase root: {data['knowledgebase_root']}") + console.print(f"Manifest: {data['manifest_path']}") + console.print(f"Files: {data['file_count']}") + counts = data.get("category_counts") or {} + if counts: + for key in sorted(counts): + console.print(f" {key}: {counts[key]}") + + +def _print_index(console: Console) -> None: + data = build_workspace_manifest(load_config()) + if not data.get("success"): + console.print(f"[bold red]{data.get('error', 'Index failed')}[/]") + return + console.print(f"Indexed {data['file_count']} files") + console.print(f"Manifest: {data['manifest_path']}") + + +def _print_list(console: Console, path: str = "", recursive: bool = True, limit: int = 20, offset: int = 0) -> None: + data = workspace_list(load_config(), relative_path=path, recursive=recursive, limit=limit, offset=offset) + if not data.get("success"): + console.print(f"[bold red]{data.get('error', 'List failed')}[/]") + return + entries = data.get("entries") or [] + if not entries: + console.print("No workspace files found.") + return + for entry in entries: + console.print(entry["relative_path"]) + if data.get("total_count", len(entries)) > len(entries): + console.print(f"[dim]Showing {len(entries)} of {data['total_count']} files[/]") + + +def _print_search(console: Console, query: str, path: str = "", file_glob: str | None = None, limit: int = 10, offset: int = 0) -> None: + data = workspace_search(query, load_config(), relative_path=path, file_glob=file_glob, limit=limit, offset=offset) + if not data.get("success"): + console.print(f"[bold red]{data.get('error', 'Search failed')}[/]") + return + matches = data.get("matches") or [] + if not matches: + console.print("No matches found.") + return + for match in matches: + console.print(f"{match['relative_path']}:{match['line']} {match['content']}") + if data.get("total_count", len(matches)) > len(matches): + console.print(f"[dim]Showing {len(matches)} of {data['total_count']} matches[/]") + + +def workspace_command(args, console: Optional[Console] = None) -> None: + console = _console(console) + action = getattr(args, "workspace_action", None) or "status" + if action == "status": + _print_status(console) + elif action == "index": + _print_index(console) + elif action == "list": + _print_list( + console, + path=getattr(args, "path", "") or "", + recursive=getattr(args, "recursive", True), + limit=getattr(args, "limit", 20), + offset=getattr(args, "offset", 0), + ) + elif action == "search": + query = getattr(args, "query", "") or "" + if not query.strip(): + console.print("Usage: hermes workspace search ") + return + _print_search( + console, + query=query, + path=getattr(args, "path", "") or "", + file_glob=getattr(args, "file_glob", None), + limit=getattr(args, "limit", 10), + offset=getattr(args, "offset", 0), + ) + else: + console.print(f"[bold red]Unknown workspace action: {action}[/]") + + +def handle_workspace_slash(cmd: str, console: Optional[Console] = None) -> None: + console = _console(console) + parts = cmd.strip().split() + if parts and parts[0].lower() == "/workspace": + parts = parts[1:] + + if not parts or parts[0] in {"status", "path"}: + _print_status(console) + return + + action = parts[0].lower() + if action == "index": + _print_index(console) + return + if action == "list": + path = parts[1] if len(parts) > 1 else "" + _print_list(console, path=path) + return + if action == "search": + query = " ".join(parts[1:]).strip() + if not query: + console.print("Usage: /workspace search ") + return + _print_search(console, query=query) + return + + console.print("Usage: /workspace [status|index|list [path]|search ]") diff --git a/model_tools.py b/model_tools.py index 2139eb08076..a5045b96515 100644 --- a/model_tools.py +++ b/model_tools.py @@ -76,6 +76,7 @@ def _discover_tools(): "tools.web_tools", "tools.terminal_tool", "tools.file_tools", + "tools.workspace_tool", "tools.vision_tools", "tools.mixture_of_agents_tool", "tools.image_generation_tool", diff --git a/tests/agent/test_workspace.py b/tests/agent/test_workspace.py new file mode 100644 index 00000000000..ee1b5eae367 --- /dev/null +++ b/tests/agent/test_workspace.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import json +from pathlib import Path + + +def _config(tmp_path: Path) -> dict: + return { + "workspace": { + "enabled": True, + "path": str(tmp_path / "workspace"), + "auto_create": True, + "persist_gateway_uploads": "ask", + }, + "knowledgebase": { + "enabled": True, + "path": str(tmp_path / "knowledgebase"), + "roots": [], + "retrieval_mode": "off", + "auto_index": True, + "watch_for_changes": False, + "max_injected_chunks": 6, + "max_injected_tokens": 3200, + "dense_top_k": 40, + "sparse_top_k": 40, + "fused_top_k": 30, + "final_top_k": 8, + "min_fused_score": 0.0, + "injection_format": "sourced_note", + "chunking": { + "default_tokens": 512, + "overlap_tokens": 80, + "code_strategy": "structural", + "markdown_strategy": "headings", + }, + "embeddings": { + "provider": "local", + "model": "embeddinggemma-300m", + "dimensions": 768, + }, + "reranker": { + "enabled": False, + "provider": "local", + "model": "bge-reranker-v2-m3", + }, + "indexing": { + "respect_gitignore": True, + "respect_hermesignore": True, + "include_hidden": False, + "max_file_mb": 10, + }, + }, + } + + +class TestWorkspacePaths: + def test_get_workspace_paths_creates_expected_directories(self, tmp_path): + from agent.workspace import get_workspace_paths + + paths = get_workspace_paths(_config(tmp_path), ensure=True) + + assert paths.workspace_root == tmp_path / "workspace" + assert paths.knowledgebase_root == tmp_path / "knowledgebase" + for subdir in ("docs", "notes", "data", "code", "uploads", "media"): + assert (paths.workspace_root / subdir).is_dir() + assert paths.indexes_dir.is_dir() + assert paths.manifests_dir.is_dir() + assert paths.cache_dir.is_dir() + + +class TestWorkspaceManifest: + def test_build_workspace_manifest_writes_summary(self, tmp_path): + from agent.workspace import build_workspace_manifest + + cfg = _config(tmp_path) + workspace = Path(cfg["workspace"]["path"]) + (workspace / "docs").mkdir(parents=True) + (workspace / "notes").mkdir(parents=True) + (workspace / "docs" / "a.md").write_text("alpha\n", encoding="utf-8") + (workspace / "notes" / "b.txt").write_text("beta\n", encoding="utf-8") + + manifest = build_workspace_manifest(cfg) + + assert manifest["success"] is True + assert manifest["file_count"] == 2 + assert manifest["manifest_path"].endswith("workspace.json") + assert Path(manifest["manifest_path"]).exists() + paths = {entry["relative_path"] for entry in manifest["files"]} + assert paths == {"docs/a.md", "notes/b.txt"} + + saved = json.loads(Path(manifest["manifest_path"]).read_text(encoding="utf-8")) + assert saved["file_count"] == 2 + + +class TestWorkspaceSearch: + def test_workspace_search_finds_text_matches_and_respects_ignore(self, tmp_path): + from agent.workspace import workspace_search + + cfg = _config(tmp_path) + workspace = Path(cfg["workspace"]["path"]) + (workspace / "docs").mkdir(parents=True) + (workspace / "docs" / "keep.md").write_text("Hermes likes retrieval\n", encoding="utf-8") + (workspace / "docs" / "skip.md").write_text("Hermes hidden\n", encoding="utf-8") + (workspace / ".hermesignore").write_text("docs/skip.md\n", encoding="utf-8") + (workspace / "docs" / "blob.bin").write_bytes(b"\x00\x01\x02Hermes") + + result = workspace_search("Hermes", config=cfg) + + assert result["success"] is True + assert result["count"] == 1 + match = result["matches"][0] + assert match["relative_path"] == "docs/keep.md" + assert match["line"] == 1 + + def test_workspace_search_supports_file_glob(self, tmp_path): + from agent.workspace import workspace_search + + cfg = _config(tmp_path) + workspace = Path(cfg["workspace"]["path"]) + (workspace / "docs").mkdir(parents=True) + (workspace / "docs" / "a.md").write_text("deploy target\n", encoding="utf-8") + (workspace / "docs" / "a.txt").write_text("deploy target\n", encoding="utf-8") + + result = workspace_search("deploy", config=cfg, file_glob="*.md") + + assert result["success"] is True + assert result["count"] == 1 + assert result["matches"][0]["relative_path"] == "docs/a.md" diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index 218059434ae..58aad5f03f7 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -10,7 +10,7 @@ from hermes_cli.commands import COMMANDS, SlashCommandCompleter EXPECTED_COMMANDS = { "/help", "/tools", "/toolsets", "/model", "/provider", "/prompt", "/personality", "/clear", "/history", "/new", "/reset", "/retry", - "/undo", "/save", "/config", "/cron", "/skills", "/platforms", + "/undo", "/save", "/config", "/cron", "/skills", "/workspace", "/platforms", "/verbose", "/reasoning", "/compress", "/title", "/usage", "/insights", "/paste", "/reload-mcp", "/rollback", "/background", "/skin", "/voice", "/quit", } diff --git a/tests/test_file_permissions.py b/tests/test_file_permissions.py index cc816f6fa85..8825ce906ed 100644 --- a/tests/test_file_permissions.py +++ b/tests/test_file_permissions.py @@ -114,7 +114,7 @@ class TestConfigFilePermissions(unittest.TestCase): home_mode = stat.S_IMODE(os.stat(home).st_mode) self.assertEqual(home_mode, 0o700) - for subdir in ("cron", "sessions", "logs", "memories"): + for subdir in ("cron", "sessions", "logs", "memories", "workspace", "knowledgebase"): subdir_mode = stat.S_IMODE(os.stat(home / subdir).st_mode) self.assertEqual(subdir_mode, 0o700, f"{subdir} should be 0700") diff --git a/tests/test_toolsets.py b/tests/test_toolsets.py index 13c34507028..bc79c216d4a 100644 --- a/tests/test_toolsets.py +++ b/tests/test_toolsets.py @@ -141,3 +141,6 @@ class TestToolsetConsistency: # All platform toolsets should be identical for ts in tool_sets[1:]: assert ts == tool_sets[0] + + def test_workspace_tool_is_exposed_in_hermes_cli(self): + assert "workspace" in resolve_toolset("hermes-cli") diff --git a/tests/test_workspace_cli_command.py b/tests/test_workspace_cli_command.py new file mode 100644 index 00000000000..9061cbab96b --- /dev/null +++ b/tests/test_workspace_cli_command.py @@ -0,0 +1,22 @@ +from unittest.mock import MagicMock, patch + + +class TestWorkspaceCLICommand: + def _make_cli(self): + from cli import HermesCLI + + cli = HermesCLI.__new__(HermesCLI) + cli.config = {"quick_commands": {}} + cli.console = MagicMock() + cli.agent = None + cli.conversation_history = [] + return cli + + def test_process_command_dispatches_workspace_handler(self): + cli = self._make_cli() + + with patch.object(cli, "_handle_workspace_command") as handler: + result = cli.process_command("/workspace status") + + assert result is True + handler.assert_called_once_with("/workspace status") diff --git a/tests/tools/test_workspace_tool.py b/tests/tools/test_workspace_tool.py new file mode 100644 index 00000000000..3aba016fad7 --- /dev/null +++ b/tests/tools/test_workspace_tool.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import json +from pathlib import Path + + +def _config(tmp_path: Path) -> dict: + return { + "workspace": { + "enabled": True, + "path": str(tmp_path / "workspace"), + "auto_create": True, + "persist_gateway_uploads": "ask", + }, + "knowledgebase": { + "enabled": True, + "path": str(tmp_path / "knowledgebase"), + "roots": [], + "retrieval_mode": "off", + "auto_index": True, + "watch_for_changes": False, + "max_injected_chunks": 6, + "max_injected_tokens": 3200, + "dense_top_k": 40, + "sparse_top_k": 40, + "fused_top_k": 30, + "final_top_k": 8, + "min_fused_score": 0.0, + "injection_format": "sourced_note", + "chunking": { + "default_tokens": 512, + "overlap_tokens": 80, + "code_strategy": "structural", + "markdown_strategy": "headings", + }, + "embeddings": {"provider": "local", "model": "embeddinggemma-300m", "dimensions": 768}, + "reranker": {"enabled": False, "provider": "local", "model": "bge-reranker-v2-m3"}, + "indexing": { + "respect_gitignore": True, + "respect_hermesignore": True, + "include_hidden": False, + "max_file_mb": 10, + }, + }, + } + + +class TestWorkspaceTool: + def test_status_reports_workspace_roots(self, tmp_path, monkeypatch): + from tools.workspace_tool import workspace_tool + + monkeypatch.setattr("tools.workspace_tool.load_config", lambda: _config(tmp_path)) + + result = json.loads(workspace_tool(action="status")) + + assert result["success"] is True + assert result["workspace_root"].endswith("workspace") + assert result["knowledgebase_root"].endswith("knowledgebase") + + def test_index_and_search_round_trip(self, tmp_path, monkeypatch): + from tools.workspace_tool import workspace_tool + + cfg = _config(tmp_path) + workspace = Path(cfg["workspace"]["path"]) + (workspace / "docs").mkdir(parents=True) + (workspace / "docs" / "deploy.md").write_text("deployment checklist\n", encoding="utf-8") + monkeypatch.setattr("tools.workspace_tool.load_config", lambda: cfg) + + indexed = json.loads(workspace_tool(action="index")) + assert indexed["success"] is True + assert indexed["file_count"] == 1 + + searched = json.loads(workspace_tool(action="search", query="deployment")) + assert searched["success"] is True + assert searched["count"] == 1 + assert searched["matches"][0]["relative_path"] == "docs/deploy.md" + + def test_list_returns_relative_paths(self, tmp_path, monkeypatch): + from tools.workspace_tool import workspace_tool + + cfg = _config(tmp_path) + workspace = Path(cfg["workspace"]["path"]) + (workspace / "notes").mkdir(parents=True) + (workspace / "notes" / "todo.txt").write_text("ship it\n", encoding="utf-8") + monkeypatch.setattr("tools.workspace_tool.load_config", lambda: cfg) + + listed = json.loads(workspace_tool(action="list")) + assert listed["success"] is True + assert listed["entries"][0]["relative_path"] == "notes/todo.txt" diff --git a/tools/workspace_tool.py b/tools/workspace_tool.py new file mode 100644 index 00000000000..e01199e77b4 --- /dev/null +++ b/tools/workspace_tool.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +"""Workspace tool — inspect and search the Hermes workspace.""" + +from __future__ import annotations + +import json +from typing import Any + +from agent.workspace import build_workspace_manifest, workspace_list, workspace_search, workspace_status +from hermes_cli.config import load_config +from tools.registry import registry + + +WORKSPACE_SCHEMA = { + "name": "workspace", + "description": "Manage the Hermes workspace under HERMES_HOME. Use this to inspect workspace status, rebuild the workspace manifest, list files, or search within workspace documents without relying on the terminal environment.", + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["status", "index", "list", "search"], + "description": "What to do: status shows roots and counts, index rebuilds the manifest, list enumerates files, search searches text content.", + }, + "query": { + "type": "string", + "description": "Regex query to search for when action='search'.", + }, + "path": { + "type": "string", + "description": "Optional subpath within the workspace to scope list/search operations.", + }, + "file_glob": { + "type": "string", + "description": "Optional filename glob filter for search, e.g. '*.md'.", + }, + "limit": { + "type": "integer", + "description": "Maximum number of entries or matches to return.", + "default": 20, + }, + "offset": { + "type": "integer", + "description": "Skip the first N entries or matches.", + "default": 0, + }, + "recursive": { + "type": "boolean", + "description": "When action='list', recurse through subdirectories (default true).", + "default": True, + }, + }, + "required": ["action"], + }, +} + + +def workspace_tool( + action: str, + query: str = "", + path: str = "", + file_glob: str | None = None, + limit: int = 20, + offset: int = 0, + recursive: bool = True, +) -> str: + try: + config = load_config() + if action == "status": + result: dict[str, Any] = workspace_status(config) + elif action == "index": + result = build_workspace_manifest(config) + elif action == "list": + result = workspace_list( + config=config, + relative_path=path, + recursive=recursive, + limit=limit, + offset=offset, + ) + elif action == "search": + result = workspace_search( + query=query, + config=config, + relative_path=path, + file_glob=file_glob, + limit=limit, + offset=offset, + ) + else: + result = {"success": False, "error": f"Unknown action: {action}"} + return json.dumps(result, ensure_ascii=False) + except Exception as e: # pragma: no cover - defensive wrapper + return json.dumps({"success": False, "error": str(e)}, ensure_ascii=False) + + +registry.register( + name="workspace", + toolset="workspace", + schema=WORKSPACE_SCHEMA, + handler=lambda args, **kw: workspace_tool( + action=args.get("action", ""), + query=args.get("query", ""), + path=args.get("path", ""), + file_glob=args.get("file_glob"), + limit=args.get("limit", 20), + offset=args.get("offset", 0), + recursive=args.get("recursive", True), + ), + check_fn=lambda: True, +) diff --git a/toolsets.py b/toolsets.py index 221ff2ca8c1..40456213c65 100644 --- a/toolsets.py +++ b/toolsets.py @@ -35,6 +35,8 @@ _HERMES_CORE_TOOLS = [ "terminal", "process", # File manipulation "read_file", "write_file", "patch", "search_files", + # Workspace knowledgebase + "workspace", # Vision + image generation "vision_analyze", "image_generate", # MoA @@ -76,7 +78,13 @@ TOOLSETS = { "tools": ["web_search", "web_extract"], "includes": [] # No other toolsets included }, - + + "workspace": { + "description": "Hermes workspace inspection and search", + "tools": ["workspace"], + "includes": [] + }, + "search": { "description": "Web search only (no content extraction/scraping)", "tools": ["web_search"],