fix: exclude hidden directories from find/grep search backends (#1558)

The primary injection vector in #1558 was search_files discovering
catalog cache files in .hub/index-cache/ via find or grep, which
don't skip hidden directories like ripgrep does by default.

Three-layer fix:

1. _search_files (find): add -not -path '*/.*' to exclude hidden
   directories, matching ripgrep's default behavior.

2. _search_with_grep: add --exclude-dir='.*' to skip hidden
   directories in the grep fallback path.

3. _write_index_cache: write a .ignore file to .hub/ so ripgrep
   also skips it even when invoked with --hidden (belt-and-suspenders).

This makes all three search backends (rg, grep, find) consistently
exclude hidden directories, preventing the agent from discovering
and reading unvetted community content in hub cache files.
This commit is contained in:
teknium1
2026-03-17 01:59:45 -07:00
parent 40e2f8d9f0
commit 7d91b436e4
3 changed files with 190 additions and 2 deletions

View File

@@ -854,17 +854,22 @@ class ShellFileOperations(FileOperations):
else:
search_pattern = pattern.split('/')[-1]
# Exclude hidden directories (matching ripgrep's default behavior).
# This prevents the agent from discovering internal cache files
# (e.g. .hub/index-cache/) that may contain unvetted content.
hidden_exclude = "-not -path '*/.*'"
# Use find with modification time sorting
# -printf '%T@ %p\n' outputs: timestamp path
# sort -rn sorts by timestamp descending (newest first)
cmd = f"find {self._escape_shell_arg(path)} -type f -name {self._escape_shell_arg(search_pattern)} " \
cmd = f"find {self._escape_shell_arg(path)} {hidden_exclude} -type f -name {self._escape_shell_arg(search_pattern)} " \
f"-printf '%T@ %p\\n' 2>/dev/null | sort -rn | tail -n +{offset + 1} | head -n {limit}"
result = self._exec(cmd, timeout=60)
if not result.stdout.strip():
# Try without -printf (BSD find compatibility -- macOS)
cmd_simple = f"find {self._escape_shell_arg(path)} -type f -name {self._escape_shell_arg(search_pattern)} " \
cmd_simple = f"find {self._escape_shell_arg(path)} {hidden_exclude} -type f -name {self._escape_shell_arg(search_pattern)} " \
f"2>/dev/null | head -n {limit + offset} | tail -n +{offset + 1}"
result = self._exec(cmd_simple, timeout=60)
@@ -1005,6 +1010,10 @@ class ShellFileOperations(FileOperations):
"""Fallback search using grep."""
cmd_parts = ["grep", "-rnH"] # -H forces filename even for single-file searches
# Exclude hidden directories (matching ripgrep's default behavior).
# This prevents searching inside .hub/index-cache/, .git/, etc.
cmd_parts.append("--exclude-dir='.*'")
# Add context if requested
if context > 0:
cmd_parts.extend(["-C", str(context)])