fix: ClawHub skill install — use /download ZIP endpoint (#1060)

The ClawHub API v1 version endpoint only returns file metadata
(path, size, sha256, contentType) without inline content or download
URLs. Our code was looking for inline content in the metadata, which
never existed, causing all ClawHub installs to fail with:
'no inline/raw file content was available'

Fix: Use the /api/v1/download endpoint (same as the official clawhub
CLI) to download skills as ZIP bundles and extract files in-memory.

Changes:
- Add _download_zip() method that downloads and extracts ZIP bundles
- Retry on 429 rate limiting with Retry-After header support
- Path sanitization and binary file filtering for security
- Keep _extract_files() as a fallback for inline/raw content
- Also fix nested file lookup (version_data.version.files)
This commit is contained in:
Teknium
2026-03-12 08:26:24 -07:00
committed by GitHub
parent 42cf66ae39
commit 5c54128475

View File

@@ -572,14 +572,23 @@ class ClawHubSource(SkillSource):
logger.warning("ClawHub fetch failed for %s: could not resolve latest version", slug)
return None
version_data = self._get_json(f"{self.BASE_URL}/skills/{slug}/versions/{latest_version}")
if not isinstance(version_data, dict):
return None
# Primary method: download the skill as a ZIP bundle from /download
files = self._download_zip(slug, latest_version)
# Fallback: try the version metadata endpoint for inline/raw content
if "SKILL.md" not in files:
version_data = self._get_json(f"{self.BASE_URL}/skills/{slug}/versions/{latest_version}")
if isinstance(version_data, dict):
# Files may be nested under version_data["version"]["files"]
files = self._extract_files(version_data) or files
if "SKILL.md" not in files:
nested = version_data.get("version", {})
if isinstance(nested, dict):
files = self._extract_files(nested) or files
files = self._extract_files(version_data)
if "SKILL.md" not in files:
logger.warning(
"ClawHub fetch for %s resolved version %s but no inline/raw file content was available",
"ClawHub fetch for %s resolved version %s but could not retrieve file content",
slug,
latest_version,
)
@@ -674,6 +683,65 @@ class ClawHubSource(SkillSource):
return files
def _download_zip(self, slug: str, version: str) -> Dict[str, str]:
"""Download skill as a ZIP bundle from the /download endpoint and extract text files."""
import io
import zipfile
files: Dict[str, str] = {}
max_retries = 3
for attempt in range(max_retries):
try:
resp = httpx.get(
f"{self.BASE_URL}/download",
params={"slug": slug, "version": version},
timeout=30,
follow_redirects=True,
)
if resp.status_code == 429:
retry_after = int(resp.headers.get("retry-after", "5"))
retry_after = min(retry_after, 15) # Cap wait time
logger.debug(
"ClawHub download rate-limited for %s, retrying in %ds (attempt %d/%d)",
slug, retry_after, attempt + 1, max_retries,
)
time.sleep(retry_after)
continue
if resp.status_code != 200:
logger.debug("ClawHub ZIP download for %s v%s returned %s", slug, version, resp.status_code)
return files
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
for info in zf.infolist():
if info.is_dir():
continue
# Sanitize path — strip leading slashes and ..
name = info.filename.lstrip("/")
if ".." in name or name.startswith("/"):
continue
# Only extract text-sized files (skip large binaries)
if info.file_size > 500_000:
logger.debug("Skipping large file in ZIP: %s (%d bytes)", name, info.file_size)
continue
try:
raw = zf.read(info.filename)
files[name] = raw.decode("utf-8")
except (UnicodeDecodeError, KeyError):
logger.debug("Skipping non-text file in ZIP: %s", name)
continue
return files
except zipfile.BadZipFile:
logger.warning("ClawHub returned invalid ZIP for %s v%s", slug, version)
return files
except httpx.HTTPError as exc:
logger.debug("ClawHub ZIP download failed for %s v%s: %s", slug, version, exc)
return files
logger.debug("ClawHub ZIP download exhausted retries for %s v%s", slug, version)
return files
def _fetch_text(self, url: str) -> Optional[str]:
try:
resp = httpx.get(url, timeout=20)