mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
feat: improve file search UX — fuzzy @ completions, mtime sorting, better suggestions (#9467)
Three improvements to file search based on user feedback:
1. Fuzzy @ completions (commands.py):
- Bare @query now does project-wide fuzzy file search instead of
prefix-only directory listing
- Uses rg --files with 5-second cache for responsive completions
- Scoring: exact name (100) > prefix (80) > substring (60) >
path contains (40) > subsequence with boundary bonus (35/25)
- Bare @ with no query shows recently modified files first
2. Mtime-sorted file search (file_operations.py):
- _search_files_rg now uses --sortr=modified (rg 13+) to surface
recently edited files first
- Falls back to unsorted on older rg versions
3. Improved file-not-found suggestions (file_operations.py):
- Replaced crude character-set overlap with ranked scoring:
same basename (90) > prefix (70) > substring (60) >
reverse substring (40) > same extension (30)
- search_files path-not-found now suggests similar directories
from the parent
This commit is contained in:
@@ -12,6 +12,9 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from collections.abc import Callable, Mapping
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
@@ -610,6 +613,10 @@ class SlashCommandCompleter(Completer):
|
||||
) -> None:
|
||||
self._skill_commands_provider = skill_commands_provider
|
||||
self._command_filter = command_filter
|
||||
# Cached project file list for fuzzy @ completions
|
||||
self._file_cache: list[str] = []
|
||||
self._file_cache_time: float = 0.0
|
||||
self._file_cache_cwd: str = ""
|
||||
|
||||
def _command_allowed(self, slash_command: str) -> bool:
|
||||
if self._command_filter is None:
|
||||
@@ -794,46 +801,138 @@ class SlashCommandCompleter(Completer):
|
||||
count += 1
|
||||
return
|
||||
|
||||
# Bare @ or @partial — show matching files/folders from cwd
|
||||
# Bare @ or @partial — fuzzy project-wide file search
|
||||
query = word[1:] # strip the @
|
||||
if not query:
|
||||
search_dir, match_prefix = ".", ""
|
||||
else:
|
||||
expanded = os.path.expanduser(query)
|
||||
if expanded.endswith("/"):
|
||||
search_dir, match_prefix = expanded, ""
|
||||
else:
|
||||
search_dir = os.path.dirname(expanded) or "."
|
||||
match_prefix = os.path.basename(expanded)
|
||||
yield from self._fuzzy_file_completions(word, query, limit)
|
||||
|
||||
try:
|
||||
entries = os.listdir(search_dir)
|
||||
except OSError:
|
||||
def _get_project_files(self) -> list[str]:
|
||||
"""Return cached list of project files (refreshed every 5s)."""
|
||||
cwd = os.getcwd()
|
||||
now = time.monotonic()
|
||||
if (
|
||||
self._file_cache
|
||||
and self._file_cache_cwd == cwd
|
||||
and now - self._file_cache_time < 5.0
|
||||
):
|
||||
return self._file_cache
|
||||
|
||||
files: list[str] = []
|
||||
# Try rg first (fast, respects .gitignore), then fd, then find.
|
||||
for cmd in [
|
||||
["rg", "--files", "--sortr=modified", cwd],
|
||||
["rg", "--files", cwd],
|
||||
["fd", "--type", "f", "--base-directory", cwd],
|
||||
]:
|
||||
tool = cmd[0]
|
||||
if not shutil.which(tool):
|
||||
continue
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=2,
|
||||
cwd=cwd,
|
||||
)
|
||||
if proc.returncode == 0 and proc.stdout.strip():
|
||||
raw = proc.stdout.strip().split("\n")
|
||||
# Store relative paths
|
||||
for p in raw[:5000]:
|
||||
rel = os.path.relpath(p, cwd) if os.path.isabs(p) else p
|
||||
files.append(rel)
|
||||
break
|
||||
except (subprocess.TimeoutExpired, OSError):
|
||||
continue
|
||||
|
||||
self._file_cache = files
|
||||
self._file_cache_time = now
|
||||
self._file_cache_cwd = cwd
|
||||
return files
|
||||
|
||||
@staticmethod
|
||||
def _score_path(filepath: str, query: str) -> int:
|
||||
"""Score a file path against a fuzzy query. Higher = better match."""
|
||||
if not query:
|
||||
return 1 # show everything when query is empty
|
||||
|
||||
filename = os.path.basename(filepath)
|
||||
lower_file = filename.lower()
|
||||
lower_path = filepath.lower()
|
||||
lower_q = query.lower()
|
||||
|
||||
# Exact filename match
|
||||
if lower_file == lower_q:
|
||||
return 100
|
||||
# Filename starts with query
|
||||
if lower_file.startswith(lower_q):
|
||||
return 80
|
||||
# Filename contains query as substring
|
||||
if lower_q in lower_file:
|
||||
return 60
|
||||
# Full path contains query
|
||||
if lower_q in lower_path:
|
||||
return 40
|
||||
# Initials / abbreviation match: e.g. "fo" matches "file_operations"
|
||||
# Check if query chars appear in order in filename
|
||||
qi = 0
|
||||
for c in lower_file:
|
||||
if qi < len(lower_q) and c == lower_q[qi]:
|
||||
qi += 1
|
||||
if qi == len(lower_q):
|
||||
# Bonus if matches land on word boundaries (after _, -, /, .)
|
||||
boundary_hits = 0
|
||||
qi = 0
|
||||
prev = "_" # treat start as boundary
|
||||
for c in lower_file:
|
||||
if qi < len(lower_q) and c == lower_q[qi]:
|
||||
if prev in "_-./":
|
||||
boundary_hits += 1
|
||||
qi += 1
|
||||
prev = c
|
||||
if boundary_hits >= len(lower_q) * 0.5:
|
||||
return 35
|
||||
return 25
|
||||
return 0
|
||||
|
||||
def _fuzzy_file_completions(self, word: str, query: str, limit: int = 20):
|
||||
"""Yield fuzzy file completions for bare @query."""
|
||||
files = self._get_project_files()
|
||||
|
||||
if not query:
|
||||
# No query — show recently modified files (already sorted by mtime)
|
||||
for fp in files[:limit]:
|
||||
is_dir = fp.endswith("/")
|
||||
filename = os.path.basename(fp)
|
||||
kind = "folder" if is_dir else "file"
|
||||
meta = "dir" if is_dir else _file_size_label(
|
||||
os.path.join(os.getcwd(), fp)
|
||||
)
|
||||
yield Completion(
|
||||
f"@{kind}:{fp}",
|
||||
start_position=-len(word),
|
||||
display=filename,
|
||||
display_meta=meta,
|
||||
)
|
||||
return
|
||||
|
||||
count = 0
|
||||
prefix_lower = match_prefix.lower()
|
||||
for entry in sorted(entries):
|
||||
if match_prefix and not entry.lower().startswith(prefix_lower):
|
||||
continue
|
||||
if entry.startswith("."):
|
||||
continue # skip hidden files in bare @ mode
|
||||
if count >= limit:
|
||||
break
|
||||
full_path = os.path.join(search_dir, entry)
|
||||
is_dir = os.path.isdir(full_path)
|
||||
display_path = os.path.relpath(full_path)
|
||||
suffix = "/" if is_dir else ""
|
||||
# Score and rank
|
||||
scored = []
|
||||
for fp in files:
|
||||
s = self._score_path(fp, query)
|
||||
if s > 0:
|
||||
scored.append((s, fp))
|
||||
scored.sort(key=lambda x: (-x[0], x[1]))
|
||||
|
||||
for _, fp in scored[:limit]:
|
||||
is_dir = fp.endswith("/")
|
||||
filename = os.path.basename(fp)
|
||||
kind = "folder" if is_dir else "file"
|
||||
meta = "dir" if is_dir else _file_size_label(full_path)
|
||||
completion = f"@{kind}:{display_path}{suffix}"
|
||||
yield Completion(
|
||||
completion,
|
||||
start_position=-len(word),
|
||||
display=entry + suffix,
|
||||
display_meta=meta,
|
||||
meta = "dir" if is_dir else _file_size_label(
|
||||
os.path.join(os.getcwd(), fp)
|
||||
)
|
||||
yield Completion(
|
||||
f"@{kind}:{fp}",
|
||||
start_position=-len(word),
|
||||
display=filename,
|
||||
display_meta=f"{fp} {meta}" if meta else fp,
|
||||
)
|
||||
count += 1
|
||||
|
||||
def _model_completions(self, sub_text: str, sub_lower: str):
|
||||
"""Yield completions for /model from config aliases + built-in aliases."""
|
||||
|
||||
@@ -556,27 +556,54 @@ class ShellFileOperations(FileOperations):
|
||||
|
||||
def _suggest_similar_files(self, path: str) -> ReadResult:
|
||||
"""Suggest similar files when the requested file is not found."""
|
||||
# Get directory and filename
|
||||
dir_path = os.path.dirname(path) or "."
|
||||
filename = os.path.basename(path)
|
||||
|
||||
# List files in directory
|
||||
ls_cmd = f"ls -1 {self._escape_shell_arg(dir_path)} 2>/dev/null | head -20"
|
||||
basename_no_ext = os.path.splitext(filename)[0]
|
||||
ext = os.path.splitext(filename)[1].lower()
|
||||
lower_name = filename.lower()
|
||||
|
||||
# List files in the target directory
|
||||
ls_cmd = f"ls -1 {self._escape_shell_arg(dir_path)} 2>/dev/null | head -50"
|
||||
ls_result = self._exec(ls_cmd)
|
||||
|
||||
similar = []
|
||||
|
||||
scored: list = [] # (score, filepath) — higher is better
|
||||
if ls_result.exit_code == 0 and ls_result.stdout.strip():
|
||||
files = ls_result.stdout.strip().split('\n')
|
||||
# Simple similarity: files that share some characters with the target
|
||||
for f in files:
|
||||
# Check if filenames share significant overlap
|
||||
common = set(filename.lower()) & set(f.lower())
|
||||
if len(common) >= len(filename) * 0.5: # 50% character overlap
|
||||
similar.append(os.path.join(dir_path, f))
|
||||
|
||||
for f in ls_result.stdout.strip().split('\n'):
|
||||
if not f:
|
||||
continue
|
||||
lf = f.lower()
|
||||
score = 0
|
||||
|
||||
# Exact match (shouldn't happen, but guard)
|
||||
if lf == lower_name:
|
||||
score = 100
|
||||
# Same base name, different extension (e.g. config.yml vs config.yaml)
|
||||
elif os.path.splitext(f)[0].lower() == basename_no_ext.lower():
|
||||
score = 90
|
||||
# Target is prefix of candidate or vice-versa
|
||||
elif lf.startswith(lower_name) or lower_name.startswith(lf):
|
||||
score = 70
|
||||
# Substring match (candidate contains query)
|
||||
elif lower_name in lf:
|
||||
score = 60
|
||||
# Reverse substring (query contains candidate name)
|
||||
elif lf in lower_name and len(lf) > 2:
|
||||
score = 40
|
||||
# Same extension with some overlap
|
||||
elif ext and os.path.splitext(f)[1].lower() == ext:
|
||||
common = set(lower_name) & set(lf)
|
||||
if len(common) >= max(len(lower_name), len(lf)) * 0.4:
|
||||
score = 30
|
||||
|
||||
if score > 0:
|
||||
scored.append((score, os.path.join(dir_path, f)))
|
||||
|
||||
scored.sort(key=lambda x: -x[0])
|
||||
similar = [fp for _, fp in scored[:5]]
|
||||
|
||||
return ReadResult(
|
||||
error=f"File not found: {path}",
|
||||
similar_files=similar[:5] # Limit to 5 suggestions
|
||||
similar_files=similar
|
||||
)
|
||||
|
||||
def read_file_raw(self, path: str) -> ReadResult:
|
||||
@@ -845,8 +872,33 @@ class ShellFileOperations(FileOperations):
|
||||
# Validate that the path exists before searching
|
||||
check = self._exec(f"test -e {self._escape_shell_arg(path)} && echo exists || echo not_found")
|
||||
if "not_found" in check.stdout:
|
||||
# Try to suggest nearby paths
|
||||
parent = os.path.dirname(path) or "."
|
||||
basename_query = os.path.basename(path)
|
||||
hint_parts = [f"Path not found: {path}"]
|
||||
# Check if parent directory exists and list similar entries
|
||||
parent_check = self._exec(
|
||||
f"test -d {self._escape_shell_arg(parent)} && echo yes || echo no"
|
||||
)
|
||||
if "yes" in parent_check.stdout and basename_query:
|
||||
ls_result = self._exec(
|
||||
f"ls -1 {self._escape_shell_arg(parent)} 2>/dev/null | head -20"
|
||||
)
|
||||
if ls_result.exit_code == 0 and ls_result.stdout.strip():
|
||||
lower_q = basename_query.lower()
|
||||
candidates = []
|
||||
for entry in ls_result.stdout.strip().split('\n'):
|
||||
if not entry:
|
||||
continue
|
||||
le = entry.lower()
|
||||
if lower_q in le or le in lower_q or le.startswith(lower_q[:3]):
|
||||
candidates.append(os.path.join(parent, entry))
|
||||
if candidates:
|
||||
hint_parts.append(
|
||||
"Similar paths: " + ", ".join(candidates[:5])
|
||||
)
|
||||
return SearchResult(
|
||||
error=f"Path not found: {path}. Verify the path exists (use 'terminal' to check).",
|
||||
error=". ".join(hint_parts),
|
||||
total_count=0
|
||||
)
|
||||
|
||||
@@ -912,7 +964,8 @@ class ShellFileOperations(FileOperations):
|
||||
|
||||
rg --files respects .gitignore and excludes hidden directories by
|
||||
default, and uses parallel directory traversal for ~200x speedup
|
||||
over find on wide trees.
|
||||
over find on wide trees. Results are sorted by modification time
|
||||
(most recently edited first) when rg >= 13.0 supports --sortr.
|
||||
"""
|
||||
# rg --files -g uses glob patterns; wrap bare names so they match
|
||||
# at any depth (equivalent to find -name).
|
||||
@@ -922,14 +975,25 @@ class ShellFileOperations(FileOperations):
|
||||
glob_pattern = pattern
|
||||
|
||||
fetch_limit = limit + offset
|
||||
cmd = (
|
||||
f"rg --files -g {self._escape_shell_arg(glob_pattern)} "
|
||||
# Try mtime-sorted first (rg 13+); fall back to unsorted if not supported.
|
||||
cmd_sorted = (
|
||||
f"rg --files --sortr=modified -g {self._escape_shell_arg(glob_pattern)} "
|
||||
f"{self._escape_shell_arg(path)} 2>/dev/null "
|
||||
f"| head -n {fetch_limit}"
|
||||
)
|
||||
result = self._exec(cmd, timeout=60)
|
||||
|
||||
result = self._exec(cmd_sorted, timeout=60)
|
||||
all_files = [f for f in result.stdout.strip().split('\n') if f]
|
||||
|
||||
if not all_files:
|
||||
# --sortr may have failed on older rg; retry without it.
|
||||
cmd_plain = (
|
||||
f"rg --files -g {self._escape_shell_arg(glob_pattern)} "
|
||||
f"{self._escape_shell_arg(path)} 2>/dev/null "
|
||||
f"| head -n {fetch_limit}"
|
||||
)
|
||||
result = self._exec(cmd_plain, timeout=60)
|
||||
all_files = [f for f in result.stdout.strip().split('\n') if f]
|
||||
|
||||
page = all_files[offset:offset + limit]
|
||||
|
||||
return SearchResult(
|
||||
|
||||
Reference in New Issue
Block a user