diff --git a/tests/tools/test_skills_hub.py b/tests/tools/test_skills_hub.py index 778a77ba..ddd1585c 100644 --- a/tests/tools/test_skills_hub.py +++ b/tests/tools/test_skills_hub.py @@ -4,6 +4,8 @@ import json from pathlib import Path from unittest.mock import patch, MagicMock +import httpx + from tools.skills_hub import ( GitHubAuth, GitHubSource, @@ -1039,3 +1041,151 @@ class TestQuarantineBundleBinaryAssets: assert (q_path / "SKILL.md").read_text(encoding="utf-8").startswith("---") assert (q_path / "assets" / "neutts-cli" / "samples" / "jo.wav").read_bytes() == b"RIFF\x00\x01fakewav" + + +# --------------------------------------------------------------------------- +# GitHubSource._download_directory — tree API + fallback (#2940) +# --------------------------------------------------------------------------- + + +class TestDownloadDirectoryViaTree: + """Tests for the Git Trees API path in _download_directory.""" + + def _source(self): + auth = MagicMock(spec=GitHubAuth) + auth.get_headers.return_value = {} + return GitHubSource(auth=auth) + + @patch.object(GitHubSource, "_fetch_file_content") + @patch("tools.skills_hub.httpx.get") + def test_tree_api_downloads_subdirectories(self, mock_get, mock_fetch): + """Tree API returns files from nested subdirectories.""" + repo_resp = MagicMock(status_code=200, json=lambda: {"default_branch": "main"}) + tree_resp = MagicMock(status_code=200, json=lambda: { + "truncated": False, + "tree": [ + {"type": "blob", "path": "skills/my-skill/SKILL.md"}, + {"type": "blob", "path": "skills/my-skill/scripts/run.py"}, + {"type": "blob", "path": "skills/my-skill/references/api.md"}, + {"type": "tree", "path": "skills/my-skill/scripts"}, + {"type": "blob", "path": "other/file.txt"}, + ], + }) + mock_get.side_effect = [repo_resp, tree_resp] + mock_fetch.side_effect = lambda repo, path: f"content-of-{path}" + + src = self._source() + files = src._download_directory("owner/repo", "skills/my-skill") + + assert "SKILL.md" in files + assert "scripts/run.py" in files + assert "references/api.md" in files + assert "other/file.txt" not in files # outside target path + assert len(files) == 3 + + @patch.object(GitHubSource, "_download_directory_recursive", return_value={"SKILL.md": "# ok"}) + @patch("tools.skills_hub.httpx.get") + def test_falls_back_on_truncated_tree(self, mock_get, mock_fallback): + """When tree is truncated, fall back to recursive Contents API.""" + repo_resp = MagicMock(status_code=200, json=lambda: {"default_branch": "main"}) + tree_resp = MagicMock(status_code=200, json=lambda: {"truncated": True, "tree": []}) + mock_get.side_effect = [repo_resp, tree_resp] + + src = self._source() + files = src._download_directory("owner/repo", "skills/my-skill") + + assert files == {"SKILL.md": "# ok"} + mock_fallback.assert_called_once_with("owner/repo", "skills/my-skill") + + @patch.object(GitHubSource, "_download_directory_recursive", return_value={"SKILL.md": "# ok"}) + @patch("tools.skills_hub.httpx.get") + def test_falls_back_on_repo_api_failure(self, mock_get, mock_fallback): + """When the repo endpoint returns non-200, fall back to Contents API.""" + mock_get.return_value = MagicMock(status_code=404) + + src = self._source() + files = src._download_directory("owner/repo", "skills/my-skill") + + assert files == {"SKILL.md": "# ok"} + mock_fallback.assert_called_once() + + @patch.object(GitHubSource, "_fetch_file_content") + @patch("tools.skills_hub.httpx.get") + def test_tree_api_skips_failed_file_fetches(self, mock_get, mock_fetch): + """Files that fail to fetch are skipped, not fatal.""" + repo_resp = MagicMock(status_code=200, json=lambda: {"default_branch": "main"}) + tree_resp = MagicMock(status_code=200, json=lambda: { + "truncated": False, + "tree": [ + {"type": "blob", "path": "skills/my-skill/SKILL.md"}, + {"type": "blob", "path": "skills/my-skill/scripts/run.py"}, + ], + }) + mock_get.side_effect = [repo_resp, tree_resp] + mock_fetch.side_effect = lambda repo, path: ( + "# Skill" if path.endswith("SKILL.md") else None + ) + + src = self._source() + files = src._download_directory("owner/repo", "skills/my-skill") + + assert "SKILL.md" in files + assert "scripts/run.py" not in files + + @patch.object(GitHubSource, "_download_directory_recursive", return_value={}) + @patch("tools.skills_hub.httpx.get") + def test_falls_back_on_network_error(self, mock_get, mock_fallback): + """Network errors in tree API trigger fallback.""" + mock_get.side_effect = httpx.ConnectError("connection refused") + + src = self._source() + src._download_directory("owner/repo", "skills/my-skill") + + mock_fallback.assert_called_once() + + +class TestDownloadDirectoryRecursive: + """Tests for the Contents API fallback path.""" + + def _source(self): + auth = MagicMock(spec=GitHubAuth) + auth.get_headers.return_value = {} + return GitHubSource(auth=auth) + + @patch.object(GitHubSource, "_fetch_file_content") + @patch("tools.skills_hub.httpx.get") + def test_recursive_downloads_subdirectories(self, mock_get, mock_fetch): + """Contents API recursion includes subdirectories.""" + root_resp = MagicMock(status_code=200, json=lambda: [ + {"name": "SKILL.md", "type": "file", "path": "skill/SKILL.md"}, + {"name": "scripts", "type": "dir", "path": "skill/scripts"}, + ]) + sub_resp = MagicMock(status_code=200, json=lambda: [ + {"name": "run.py", "type": "file", "path": "skill/scripts/run.py"}, + ]) + mock_get.side_effect = [root_resp, sub_resp] + mock_fetch.side_effect = lambda repo, path: f"content-of-{path}" + + src = self._source() + files = src._download_directory_recursive("owner/repo", "skill") + + assert "SKILL.md" in files + assert "scripts/run.py" in files + + @patch.object(GitHubSource, "_fetch_file_content") + @patch("tools.skills_hub.httpx.get") + def test_recursive_handles_subdir_failure(self, mock_get, mock_fetch): + """Subdirectory 403/rate-limit returns empty but doesn't crash.""" + root_resp = MagicMock(status_code=200, json=lambda: [ + {"name": "SKILL.md", "type": "file", "path": "skill/SKILL.md"}, + {"name": "scripts", "type": "dir", "path": "skill/scripts"}, + ]) + sub_resp = MagicMock(status_code=403) + mock_get.side_effect = [root_resp, sub_resp] + mock_fetch.return_value = "content" + + src = self._source() + files = src._download_directory_recursive("owner/repo", "skill") + + assert "SKILL.md" in files + assert "scripts/run.py" not in files # lost due to rate limit diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 8ae65ed7..782b0ce1 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -404,11 +404,75 @@ class GitHubSource(SkillSource): return skills def _download_directory(self, repo: str, path: str) -> Dict[str, str]: - """Recursively download all text files from a GitHub directory.""" + """Recursively download all text files from a GitHub directory. + + Uses the Git Trees API first (single call for the entire tree) to + avoid per-directory rate limiting that causes silent subdirectory + loss. Falls back to the recursive Contents API when the tree + endpoint is unavailable or the response is truncated. + """ + files = self._download_directory_via_tree(repo, path) + if files is not None: + return files + logger.debug("Tree API unavailable for %s/%s, falling back to Contents API", repo, path) + return self._download_directory_recursive(repo, path) + + def _download_directory_via_tree(self, repo: str, path: str) -> Optional[Dict[str, str]]: + """Download an entire directory using the Git Trees API (single request).""" + path = path.rstrip("/") + headers = self.auth.get_headers() + + # Resolve the default branch via the repo endpoint + try: + repo_url = f"https://api.github.com/repos/{repo}" + resp = httpx.get(repo_url, headers=headers, timeout=15, follow_redirects=True) + if resp.status_code != 200: + return None + default_branch = resp.json().get("default_branch", "main") + except (httpx.HTTPError, ValueError): + return None + + # Fetch the full recursive tree (branch name works as tree-ish) + try: + tree_url = f"https://api.github.com/repos/{repo}/git/trees/{default_branch}" + resp = httpx.get( + tree_url, params={"recursive": "1"}, + headers=headers, timeout=30, follow_redirects=True, + ) + if resp.status_code != 200: + return None + tree_data = resp.json() + if tree_data.get("truncated"): + logger.debug("Git tree truncated for %s, falling back to Contents API", repo) + return None + except (httpx.HTTPError, ValueError): + return None + + # Filter to blobs under our target path and fetch content + prefix = f"{path}/" + files: Dict[str, str] = {} + for item in tree_data.get("tree", []): + if item.get("type") != "blob": + continue + item_path = item.get("path", "") + if not item_path.startswith(prefix): + continue + rel_path = item_path[len(prefix):] + content = self._fetch_file_content(repo, item_path) + if content is not None: + files[rel_path] = content + else: + logger.debug("Skipped file (fetch failed): %s/%s", repo, item_path) + + return files if files else None + + def _download_directory_recursive(self, repo: str, path: str) -> Dict[str, str]: + """Recursively download via Contents API (fallback).""" url = f"https://api.github.com/repos/{repo}/contents/{path.rstrip('/')}" try: resp = httpx.get(url, headers=self.auth.get_headers(), timeout=15, follow_redirects=True) if resp.status_code != 200: + logger.debug("Contents API returned %d for %s/%s", resp.status_code, repo, path) return {} except httpx.HTTPError: return {} @@ -428,7 +492,9 @@ class GitHubSource(SkillSource): rel_path = name files[rel_path] = content elif entry_type == "dir": - sub_files = self._download_directory(repo, entry.get("path", "")) + sub_files = self._download_directory_recursive(repo, entry.get("path", "")) + if not sub_files: + logger.debug("Empty or failed subdirectory: %s/%s", repo, entry.get("path", "")) for sub_name, sub_content in sub_files.items(): files[f"{name}/{sub_name}"] = sub_content