fix: parallelize skills browse/search to prevent hanging (#7301)

hermes skills browse ran all 7 source adapters serially with no overall
timeout and no progress indicator. On a cold cache, GitHubSource alone
could make 100+ sequential HTTP calls (directory listing + inspect per
skill per tap), taking 5+ minutes with no output — appearing to hang.

Changes:
- Add parallel_search_sources() in tools/skills_hub.py that runs all
  source adapters concurrently via ThreadPoolExecutor with a 30s
  overall timeout. Sources that finish in time contribute results;
  slow ones are skipped gracefully with a visible notice.
- Update unified_search() to use parallel_search_sources() internally.
- Update do_browse() and do_search() in hermes_cli/skills_hub.py to
  show a Rich spinner while fetching, so the user sees activity.
- Bump per-source limits (clawhub 50→500, lobehub 50→500, etc.) now
  that fetching is parallel — yields far more results per browse.
- Report timed-out sources and suggest re-running for cached results.
- Replace 'inspect/install' footer with 'search deeper' tip.

Worst-case latency drops from 5+ minutes (serial) to ~30s (parallel
with timeout cap). Result count should jump from ~242 to 1000+.
This commit is contained in:
Teknium
2026-04-10 12:54:18 -07:00
committed by GitHub
parent a093eb47f7
commit 7e28b7b5d5
2 changed files with 108 additions and 35 deletions

View File

@@ -151,7 +151,8 @@ def do_search(query: str, source: str = "all", limit: int = 10,
auth = GitHubAuth()
sources = create_source_router(auth)
results = unified_search(query, sources, source_filter=source, limit=limit)
with c.status("[bold]Searching registries..."):
results = unified_search(query, sources, source_filter=source, limit=limit)
if not results:
c.print("[dim]No skills found matching your query.[/]\n")
@@ -187,7 +188,7 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
Official skills are always shown first, regardless of source filter.
"""
from tools.skills_hub import (
GitHubAuth, create_source_router,
GitHubAuth, create_source_router, parallel_search_sources,
)
# Clamp page_size to safe range
@@ -198,27 +199,23 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
auth = GitHubAuth()
sources = create_source_router(auth)
# Collect results from all (or filtered) sources
# Use empty query to get everything; per-source limits prevent overload
# Collect results from all (or filtered) sources in parallel.
# Per-source limits are generous — parallelism + 30s timeout cap prevents hangs.
_TRUST_RANK = {"builtin": 3, "trusted": 2, "community": 1}
_PER_SOURCE_LIMIT = {"official": 100, "skills-sh": 100, "well-known": 25, "github": 100, "clawhub": 50,
"claude-marketplace": 50, "lobehub": 50}
_PER_SOURCE_LIMIT = {
"official": 200, "skills-sh": 200, "well-known": 50,
"github": 200, "clawhub": 500, "claude-marketplace": 100,
"lobehub": 500,
}
all_results: list = []
source_counts: dict = {}
for src in sources:
sid = src.source_id()
if source != "all" and sid != source and sid != "official":
# Always include official source for the "first" placement
continue
try:
limit = _PER_SOURCE_LIMIT.get(sid, 50)
results = src.search("", limit=limit)
source_counts[sid] = len(results)
all_results.extend(results)
except Exception:
continue
with c.status("[bold]Fetching skills from registries..."):
all_results, source_counts, timed_out = parallel_search_sources(
sources,
query="",
per_source_limits=_PER_SOURCE_LIMIT,
source_filter=source,
overall_timeout=30,
)
if not all_results:
c.print("[dim]No skills found in the Skills Hub.[/]\n")
@@ -252,8 +249,11 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
# Build header
source_label = f"{source}" if source != "all" else "— all sources"
loaded_label = f"{total} skills loaded"
if timed_out:
loaded_label += f", {len(timed_out)} source(s) still loading"
c.print(f"\n[bold]Skills Hub — Browse {source_label}[/]"
f" [dim]({total} skills, page {page}/{total_pages})[/]")
f" [dim]({loaded_label}, page {page}/{total_pages})[/]")
if official_count > 0 and page == 1:
c.print(f"[bright_cyan]★ {official_count} official optional skill(s) from Nous Research[/]")
c.print()
@@ -300,8 +300,11 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
parts = [f"{sid}: {ct}" for sid, ct in sorted(source_counts.items())]
c.print(f" [dim]Sources: {', '.join(parts)}[/]")
c.print("[dim]Use: hermes skills inspect <identifier> to preview, "
"hermes skills install <identifier> to install[/]\n")
if timed_out:
c.print(f" [yellow]⚡ Slow sources skipped: {', '.join(timed_out)} "
f"— run again for cached results[/]")
c.print("[dim]Tip: 'hermes skills search <query>' searches deeper across all registries[/]\n")
def do_install(identifier: str, category: str = "", force: bool = False,

View File

@@ -2675,19 +2675,89 @@ def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]
return sources
def _search_one_source(
src: SkillSource, query: str, limit: int
) -> Tuple[str, List[SkillMeta]]:
"""Search a single source. Runs in a thread for parallelism."""
try:
return src.source_id(), src.search(query, limit=limit)
except Exception as e:
logger.debug("Search failed for %s: %s", src.source_id(), e)
return src.source_id(), []
def parallel_search_sources(
sources: List[SkillSource],
query: str = "",
per_source_limits: Optional[Dict[str, int]] = None,
source_filter: str = "all",
overall_timeout: float = 30,
on_source_done: Optional[Any] = None,
) -> Tuple[List[SkillMeta], Dict[str, int], List[str]]:
"""Search all sources in parallel with per-source timeout.
Returns ``(all_results, source_counts, timed_out_ids)``.
*on_source_done* is an optional callback ``(source_id, count) -> None``
invoked as each source completes — useful for progress indicators.
"""
from concurrent.futures import ThreadPoolExecutor, as_completed
per_source_limits = per_source_limits or {}
active: List[SkillSource] = []
for src in sources:
sid = src.source_id()
if source_filter != "all" and sid != source_filter and sid != "official":
continue
active.append(src)
all_results: List[SkillMeta] = []
source_counts: Dict[str, int] = {}
timed_out_ids: List[str] = []
if not active:
return all_results, source_counts, timed_out_ids
with ThreadPoolExecutor(max_workers=min(len(active), 8)) as pool:
futures = {}
for src in active:
lim = per_source_limits.get(src.source_id(), 50)
fut = pool.submit(_search_one_source, src, query, lim)
futures[fut] = src.source_id()
try:
for fut in as_completed(futures, timeout=overall_timeout):
try:
sid, results = fut.result(timeout=0)
source_counts[sid] = len(results)
all_results.extend(results)
if on_source_done:
on_source_done(sid, len(results))
except Exception:
pass
except TimeoutError:
timed_out_ids = [
futures[f] for f in futures if not f.done()
]
if timed_out_ids:
logger.debug(
"Skills browse timed out waiting for: %s",
", ".join(timed_out_ids),
)
return all_results, source_counts, timed_out_ids
def unified_search(query: str, sources: List[SkillSource],
source_filter: str = "all", limit: int = 10) -> List[SkillMeta]:
"""Search all sources and merge results."""
all_results: List[SkillMeta] = []
for src in sources:
if source_filter != "all" and src.source_id() != source_filter:
continue
try:
results = src.search(query, limit=limit)
all_results.extend(results)
except Exception as e:
logger.debug(f"Search failed for {src.source_id()}: {e}")
"""Search all sources (in parallel) and merge results."""
all_results, _, _ = parallel_search_sources(
sources,
query=query,
source_filter=source_filter,
overall_timeout=30,
)
# Deduplicate by name, preferring higher trust levels
_TRUST_RANK = {"builtin": 2, "trusted": 1, "community": 0}