feat: improve file search UX — fuzzy @ completions, mtime sorting, better suggestions (#9467)

Three improvements to file search based on user feedback: 1. Fuzzy @ completions (commands.py): - Bare @query now does project-wide fuzzy file search instead of prefix-only directory listing - Uses rg --files with 5-second cache for responsive completions - Scoring: exact name (100) > prefix (80) > substring (60) > path contains (40) > subsequence with boundary bonus (35/25) - Bare @ with no query shows recently modified files first 2. Mtime-sorted file search (file_operations.py): - _search_files_rg now uses --sortr=modified (rg 13+) to surface recently edited files first - Falls back to unsorted on older rg versions 3. Improved file-not-found suggestions (file_operations.py): - Replaced crude character-set overlap with ranked scoring: same basename (90) > prefix (70) > substring (60) > reverse substring (40) > same extension (30) - search_files path-not-found now suggests similar directories from the parent
2026-04-28 06:51:16 +08:00 · 2026-04-13 23:54:45 -07:00
parent c7e2fe655a
commit eb44abd6b1
2 changed files with 218 additions and 55 deletions
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -12,6 +12,9 @@ from __future__ import annotations

 import os
 import re
+import shutil
+import subprocess
+import time
 from collections.abc import Callable, Mapping
 from dataclasses import dataclass
 from typing import Any
@@ -610,6 +613,10 @@ class SlashCommandCompleter(Completer):
    ) -> None:
        self._skill_commands_provider = skill_commands_provider
        self._command_filter = command_filter
+        # Cached project file list for fuzzy @ completions
+        self._file_cache: list[str] = []
+        self._file_cache_time: float = 0.0
+        self._file_cache_cwd: str = ""

    def _command_allowed(self, slash_command: str) -> bool:
        if self._command_filter is None:
@@ -794,46 +801,138 @@ class SlashCommandCompleter(Completer):
                    count += 1
                return

-        # Bare @ or @partial — show matching files/folders from cwd
+        # Bare @ or @partial — fuzzy project-wide file search
        query = word[1:]  # strip the @
-        if not query:
-            search_dir, match_prefix = ".", ""
-        else:
-            expanded = os.path.expanduser(query)
-            if expanded.endswith("/"):
-                search_dir, match_prefix = expanded, ""
-            else:
-                search_dir = os.path.dirname(expanded) or "."
-                match_prefix = os.path.basename(expanded)
+        yield from self._fuzzy_file_completions(word, query, limit)

-        try:
-            entries = os.listdir(search_dir)
-        except OSError:
+    def _get_project_files(self) -> list[str]:
+        """Return cached list of project files (refreshed every 5s)."""
+        cwd = os.getcwd()
+        now = time.monotonic()
+        if (
+            self._file_cache
+            and self._file_cache_cwd == cwd
+            and now - self._file_cache_time < 5.0
+        ):
+            return self._file_cache
+
+        files: list[str] = []
+        # Try rg first (fast, respects .gitignore), then fd, then find.
+        for cmd in [
+            ["rg", "--files", "--sortr=modified", cwd],
+            ["rg", "--files", cwd],
+            ["fd", "--type", "f", "--base-directory", cwd],
+        ]:
+            tool = cmd[0]
+            if not shutil.which(tool):
+                continue
+            try:
+                proc = subprocess.run(
+                    cmd, capture_output=True, text=True, timeout=2,
+                    cwd=cwd,
+                )
+                if proc.returncode == 0 and proc.stdout.strip():
+                    raw = proc.stdout.strip().split("\n")
+                    # Store relative paths
+                    for p in raw[:5000]:
+                        rel = os.path.relpath(p, cwd) if os.path.isabs(p) else p
+                        files.append(rel)
+                    break
+            except (subprocess.TimeoutExpired, OSError):
+                continue
+
+        self._file_cache = files
+        self._file_cache_time = now
+        self._file_cache_cwd = cwd
+        return files
+
+    @staticmethod
+    def _score_path(filepath: str, query: str) -> int:
+        """Score a file path against a fuzzy query. Higher = better match."""
+        if not query:
+            return 1  # show everything when query is empty
+
+        filename = os.path.basename(filepath)
+        lower_file = filename.lower()
+        lower_path = filepath.lower()
+        lower_q = query.lower()
+
+        # Exact filename match
+        if lower_file == lower_q:
+            return 100
+        # Filename starts with query
+        if lower_file.startswith(lower_q):
+            return 80
+        # Filename contains query as substring
+        if lower_q in lower_file:
+            return 60
+        # Full path contains query
+        if lower_q in lower_path:
+            return 40
+        # Initials / abbreviation match: e.g. "fo" matches "file_operations"
+        # Check if query chars appear in order in filename
+        qi = 0
+        for c in lower_file:
+            if qi < len(lower_q) and c == lower_q[qi]:
+                qi += 1
+        if qi == len(lower_q):
+            # Bonus if matches land on word boundaries (after _, -, /, .)
+            boundary_hits = 0
+            qi = 0
+            prev = "_"  # treat start as boundary
+            for c in lower_file:
+                if qi < len(lower_q) and c == lower_q[qi]:
+                    if prev in "_-./":
+                        boundary_hits += 1
+                    qi += 1
+                prev = c
+            if boundary_hits >= len(lower_q) * 0.5:
+                return 35
+            return 25
+        return 0
+
+    def _fuzzy_file_completions(self, word: str, query: str, limit: int = 20):
+        """Yield fuzzy file completions for bare @query."""
+        files = self._get_project_files()
+
+        if not query:
+            # No query — show recently modified files (already sorted by mtime)
+            for fp in files[:limit]:
+                is_dir = fp.endswith("/")
+                filename = os.path.basename(fp)
+                kind = "folder" if is_dir else "file"
+                meta = "dir" if is_dir else _file_size_label(
+                    os.path.join(os.getcwd(), fp)
+                )
+                yield Completion(
+                    f"@{kind}:{fp}",
+                    start_position=-len(word),
+                    display=filename,
+                    display_meta=meta,
+                )
            return

-        count = 0
-        prefix_lower = match_prefix.lower()
-        for entry in sorted(entries):
-            if match_prefix and not entry.lower().startswith(prefix_lower):
-                continue
-            if entry.startswith("."):
-                continue  # skip hidden files in bare @ mode
-            if count >= limit:
-                break
-            full_path = os.path.join(search_dir, entry)
-            is_dir = os.path.isdir(full_path)
-            display_path = os.path.relpath(full_path)
-            suffix = "/" if is_dir else ""
+        # Score and rank
+        scored = []
+        for fp in files:
+            s = self._score_path(fp, query)
+            if s > 0:
+                scored.append((s, fp))
+        scored.sort(key=lambda x: (-x[0], x[1]))
+
+        for _, fp in scored[:limit]:
+            is_dir = fp.endswith("/")
+            filename = os.path.basename(fp)
            kind = "folder" if is_dir else "file"
-            meta = "dir" if is_dir else _file_size_label(full_path)
-            completion = f"@{kind}:{display_path}{suffix}"
-            yield Completion(
-                completion,
-                start_position=-len(word),
-                display=entry + suffix,
-                display_meta=meta,
+            meta = "dir" if is_dir else _file_size_label(
+                os.path.join(os.getcwd(), fp)
+            )
+            yield Completion(
+                f"@{kind}:{fp}",
+                start_position=-len(word),
+                display=filename,
+                display_meta=f"{fp}  {meta}" if meta else fp,
            )
-            count += 1

    def _model_completions(self, sub_text: str, sub_lower: str):
        """Yield completions for /model from config aliases + built-in aliases."""
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -556,27 +556,54 @@ class ShellFileOperations(FileOperations):
    
    def _suggest_similar_files(self, path: str) -> ReadResult:
        """Suggest similar files when the requested file is not found."""
-        # Get directory and filename
        dir_path = os.path.dirname(path) or "."
        filename = os.path.basename(path)
-        
-        # List files in directory
-        ls_cmd = f"ls -1 {self._escape_shell_arg(dir_path)} 2>/dev/null | head -20"
+        basename_no_ext = os.path.splitext(filename)[0]
+        ext = os.path.splitext(filename)[1].lower()
+        lower_name = filename.lower()
+
+        # List files in the target directory
+        ls_cmd = f"ls -1 {self._escape_shell_arg(dir_path)} 2>/dev/null | head -50"
        ls_result = self._exec(ls_cmd)
-        
-        similar = []
+
+        scored: list = []  # (score, filepath) — higher is better
        if ls_result.exit_code == 0 and ls_result.stdout.strip():
-            files = ls_result.stdout.strip().split('\n')
-            # Simple similarity: files that share some characters with the target
-            for f in files:
-                # Check if filenames share significant overlap
-                common = set(filename.lower()) & set(f.lower())
-                if len(common) >= len(filename) * 0.5:  # 50% character overlap
-                    similar.append(os.path.join(dir_path, f))
-        
+            for f in ls_result.stdout.strip().split('\n'):
+                if not f:
+                    continue
+                lf = f.lower()
+                score = 0
+
+                # Exact match (shouldn't happen, but guard)
+                if lf == lower_name:
+                    score = 100
+                # Same base name, different extension (e.g. config.yml vs config.yaml)
+                elif os.path.splitext(f)[0].lower() == basename_no_ext.lower():
+                    score = 90
+                # Target is prefix of candidate or vice-versa
+                elif lf.startswith(lower_name) or lower_name.startswith(lf):
+                    score = 70
+                # Substring match (candidate contains query)
+                elif lower_name in lf:
+                    score = 60
+                # Reverse substring (query contains candidate name)
+                elif lf in lower_name and len(lf) > 2:
+                    score = 40
+                # Same extension with some overlap
+                elif ext and os.path.splitext(f)[1].lower() == ext:
+                    common = set(lower_name) & set(lf)
+                    if len(common) >= max(len(lower_name), len(lf)) * 0.4:
+                        score = 30
+
+                if score > 0:
+                    scored.append((score, os.path.join(dir_path, f)))
+
+        scored.sort(key=lambda x: -x[0])
+        similar = [fp for _, fp in scored[:5]]
+
        return ReadResult(
            error=f"File not found: {path}",
-            similar_files=similar[:5]  # Limit to 5 suggestions
+            similar_files=similar
        )
    
    def read_file_raw(self, path: str) -> ReadResult:
@@ -845,8 +872,33 @@ class ShellFileOperations(FileOperations):
        # Validate that the path exists before searching
        check = self._exec(f"test -e {self._escape_shell_arg(path)} && echo exists || echo not_found")
        if "not_found" in check.stdout:
+            # Try to suggest nearby paths
+            parent = os.path.dirname(path) or "."
+            basename_query = os.path.basename(path)
+            hint_parts = [f"Path not found: {path}"]
+            # Check if parent directory exists and list similar entries
+            parent_check = self._exec(
+                f"test -d {self._escape_shell_arg(parent)} && echo yes || echo no"
+            )
+            if "yes" in parent_check.stdout and basename_query:
+                ls_result = self._exec(
+                    f"ls -1 {self._escape_shell_arg(parent)} 2>/dev/null | head -20"
+                )
+                if ls_result.exit_code == 0 and ls_result.stdout.strip():
+                    lower_q = basename_query.lower()
+                    candidates = []
+                    for entry in ls_result.stdout.strip().split('\n'):
+                        if not entry:
+                            continue
+                        le = entry.lower()
+                        if lower_q in le or le in lower_q or le.startswith(lower_q[:3]):
+                            candidates.append(os.path.join(parent, entry))
+                    if candidates:
+                        hint_parts.append(
+                            "Similar paths: " + ", ".join(candidates[:5])
+                        )
            return SearchResult(
-                error=f"Path not found: {path}. Verify the path exists (use 'terminal' to check).",
+                error=". ".join(hint_parts),
                total_count=0
            )
        
@@ -912,7 +964,8 @@ class ShellFileOperations(FileOperations):

        rg --files respects .gitignore and excludes hidden directories by
        default, and uses parallel directory traversal for ~200x speedup
-        over find on wide trees.
+        over find on wide trees.  Results are sorted by modification time
+        (most recently edited first) when rg >= 13.0 supports --sortr.
        """
        # rg --files -g uses glob patterns; wrap bare names so they match
        # at any depth (equivalent to find -name).
@@ -922,14 +975,25 @@ class ShellFileOperations(FileOperations):
            glob_pattern = pattern

        fetch_limit = limit + offset
-        cmd = (
-            f"rg --files -g {self._escape_shell_arg(glob_pattern)} "
+        # Try mtime-sorted first (rg 13+); fall back to unsorted if not supported.
+        cmd_sorted = (
+            f"rg --files --sortr=modified -g {self._escape_shell_arg(glob_pattern)} "
            f"{self._escape_shell_arg(path)} 2>/dev/null "
            f"| head -n {fetch_limit}"
        )
-        result = self._exec(cmd, timeout=60)
-
+        result = self._exec(cmd_sorted, timeout=60)
        all_files = [f for f in result.stdout.strip().split('\n') if f]
+
+        if not all_files:
+            # --sortr may have failed on older rg; retry without it.
+            cmd_plain = (
+                f"rg --files -g {self._escape_shell_arg(glob_pattern)} "
+                f"{self._escape_shell_arg(path)} 2>/dev/null "
+                f"| head -n {fetch_limit}"
+            )
+            result = self._exec(cmd_plain, timeout=60)
+            all_files = [f for f in result.stdout.strip().split('\n') if f]
+
        page = all_files[offset:offset + limit]

        return SearchResult(