fix: add directory exclusions for scan performance (#170)

This commit is contained in:
2026-04-15 15:06:09 +00:00
parent 49365c64d2
commit 93bc3fc18a

View File

@@ -56,6 +56,15 @@ SHELL_COMMAND_PATTERNS = [
"pip install", "npm install", "cargo build",
]
# Directories to skip during scans — large/uninteresting trees
EXCLUDE_DIRS = frozenset({
"node_modules", "venv", ".venv", "__pycache__", ".git",
"site-packages", "dist", "build", ".tox", ".mypy_cache",
".pytest_cache", "coverage", ".next", "vendor",
"skills", # hermes skills dir is huge
"audio_cache", "skins", "profiles",
})
# Session tool calls that appear repeatedly — candidates for workflow automation
TOOL_SEQUENCE_MIN_OCCURRENCES = 3
@@ -169,10 +178,11 @@ def analyze_documents(root_dirs: List[str]) -> List[Dict[str, Any]]:
continue
if path.suffix not in doc_extensions:
continue
# Skip hidden dirs and common non-docs
if any(part.startswith(".") for part in path.parts):
# Skip excluded dirs and hidden dirs
parts = path.relative_to(root).parts if root in path.parents or root == path.parent else path.parts
if any(p.startswith(".") or p in EXCLUDE_DIRS for p in parts):
continue
if "node_modules" in str(path) or "venv" in str(path):
if len(parts) > 8:
continue
try:
@@ -230,9 +240,10 @@ def analyze_scripts(root_dirs: List[str]) -> List[Dict[str, Any]]:
continue
if path.suffix not in script_extensions:
continue
if any(part.startswith(".") for part in path.parts):
parts = path.relative_to(root).parts if root in path.parents or root == path.parent else path.parts
if any(p.startswith(".") or p in EXCLUDE_DIRS for p in parts):
continue
if "node_modules" in str(path) or "venv" in str(path) or "__pycache__" in str(path):
if len(parts) > 8:
continue
try: