fix: ClawHub skill install — use /download ZIP endpoint (#1060)

The ClawHub API v1 version endpoint only returns file metadata (path, size, sha256, contentType) without inline content or download URLs. Our code was looking for inline content in the metadata, which never existed, causing all ClawHub installs to fail with: 'no inline/raw file content was available' Fix: Use the /api/v1/download endpoint (same as the official clawhub CLI) to download skills as ZIP bundles and extract files in-memory. Changes: - Add _download_zip() method that downloads and extracts ZIP bundles - Retry on 429 rate limiting with Retry-After header support - Path sanitization and binary file filtering for security - Keep _extract_files() as a fallback for inline/raw content - Also fix nested file lookup (version_data.version.files)
2026-03-12 08:26:24 -07:00
parent 42cf66ae39
commit 5c54128475
1 changed files with 73 additions and 5 deletions
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@@ -572,14 +572,23 @@ class ClawHubSource(SkillSource):
            logger.warning("ClawHub fetch failed for %s: could not resolve latest version", slug)
            return None

-        version_data = self._get_json(f"{self.BASE_URL}/skills/{slug}/versions/{latest_version}")
-        if not isinstance(version_data, dict):
-            return None
+        # Primary method: download the skill as a ZIP bundle from /download
+        files = self._download_zip(slug, latest_version)
+
+        # Fallback: try the version metadata endpoint for inline/raw content
+        if "SKILL.md" not in files:
+            version_data = self._get_json(f"{self.BASE_URL}/skills/{slug}/versions/{latest_version}")
+            if isinstance(version_data, dict):
+                # Files may be nested under version_data["version"]["files"]
+                files = self._extract_files(version_data) or files
+                if "SKILL.md" not in files:
+                    nested = version_data.get("version", {})
+                    if isinstance(nested, dict):
+                        files = self._extract_files(nested) or files

-        files = self._extract_files(version_data)
        if "SKILL.md" not in files:
            logger.warning(
-                "ClawHub fetch for %s resolved version %s but no inline/raw file content was available",
+                "ClawHub fetch for %s resolved version %s but could not retrieve file content",
                slug,
                latest_version,
            )
@@ -674,6 +683,65 @@ class ClawHubSource(SkillSource):

        return files

+    def _download_zip(self, slug: str, version: str) -> Dict[str, str]:
+        """Download skill as a ZIP bundle from the /download endpoint and extract text files."""
+        import io
+        import zipfile
+
+        files: Dict[str, str] = {}
+        max_retries = 3
+        for attempt in range(max_retries):
+            try:
+                resp = httpx.get(
+                    f"{self.BASE_URL}/download",
+                    params={"slug": slug, "version": version},
+                    timeout=30,
+                    follow_redirects=True,
+                )
+                if resp.status_code == 429:
+                    retry_after = int(resp.headers.get("retry-after", "5"))
+                    retry_after = min(retry_after, 15)  # Cap wait time
+                    logger.debug(
+                        "ClawHub download rate-limited for %s, retrying in %ds (attempt %d/%d)",
+                        slug, retry_after, attempt + 1, max_retries,
+                    )
+                    time.sleep(retry_after)
+                    continue
+                if resp.status_code != 200:
+                    logger.debug("ClawHub ZIP download for %s v%s returned %s", slug, version, resp.status_code)
+                    return files
+
+                with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
+                    for info in zf.infolist():
+                        if info.is_dir():
+                            continue
+                        # Sanitize path — strip leading slashes and ..
+                        name = info.filename.lstrip("/")
+                        if ".." in name or name.startswith("/"):
+                            continue
+                        # Only extract text-sized files (skip large binaries)
+                        if info.file_size > 500_000:
+                            logger.debug("Skipping large file in ZIP: %s (%d bytes)", name, info.file_size)
+                            continue
+                        try:
+                            raw = zf.read(info.filename)
+                            files[name] = raw.decode("utf-8")
+                        except (UnicodeDecodeError, KeyError):
+                            logger.debug("Skipping non-text file in ZIP: %s", name)
+                            continue
+
+                return files
+
+            except zipfile.BadZipFile:
+                logger.warning("ClawHub returned invalid ZIP for %s v%s", slug, version)
+                return files
+            except httpx.HTTPError as exc:
+                logger.debug("ClawHub ZIP download failed for %s v%s: %s", slug, version, exc)
+                return files
+
+        logger.debug("ClawHub ZIP download exhausted retries for %s v%s", slug, version)
+        return files
+
    def _fetch_text(self, url: str) -> Optional[str]:
        try:
            resp = httpx.get(url, timeout=20)