From 93bc3fc18a5908d94ce82d7c8fa92ce4b96c0149 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Wed, 15 Apr 2026 15:06:09 +0000 Subject: [PATCH] fix: add directory exclusions for scan performance (#170) --- scripts/automation_opportunity_finder.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/scripts/automation_opportunity_finder.py b/scripts/automation_opportunity_finder.py index dbacc3e..677d943 100644 --- a/scripts/automation_opportunity_finder.py +++ b/scripts/automation_opportunity_finder.py @@ -56,6 +56,15 @@ SHELL_COMMAND_PATTERNS = [ "pip install", "npm install", "cargo build", ] +# Directories to skip during scans — large/uninteresting trees +EXCLUDE_DIRS = frozenset({ + "node_modules", "venv", ".venv", "__pycache__", ".git", + "site-packages", "dist", "build", ".tox", ".mypy_cache", + ".pytest_cache", "coverage", ".next", "vendor", + "skills", # hermes skills dir is huge + "audio_cache", "skins", "profiles", +}) + # Session tool calls that appear repeatedly — candidates for workflow automation TOOL_SEQUENCE_MIN_OCCURRENCES = 3 @@ -169,10 +178,11 @@ def analyze_documents(root_dirs: List[str]) -> List[Dict[str, Any]]: continue if path.suffix not in doc_extensions: continue - # Skip hidden dirs and common non-docs - if any(part.startswith(".") for part in path.parts): + # Skip excluded dirs and hidden dirs + parts = path.relative_to(root).parts if root in path.parents or root == path.parent else path.parts + if any(p.startswith(".") or p in EXCLUDE_DIRS for p in parts): continue - if "node_modules" in str(path) or "venv" in str(path): + if len(parts) > 8: continue try: @@ -230,9 +240,10 @@ def analyze_scripts(root_dirs: List[str]) -> List[Dict[str, Any]]: continue if path.suffix not in script_extensions: continue - if any(part.startswith(".") for part in path.parts): + parts = path.relative_to(root).parts if root in path.parents or root == path.parent else path.parts + if any(p.startswith(".") or p in EXCLUDE_DIRS for p in parts): continue - if "node_modules" in str(path) or "venv" in str(path) or "__pycache__" in str(path): + if len(parts) > 8: continue try: