diff --git a/tests/tools/test_skills_hub_clawhub.py b/tests/tools/test_skills_hub_clawhub.py index 98611d8d1..2318ec80e 100644 --- a/tests/tools/test_skills_hub_clawhub.py +++ b/tests/tools/test_skills_hub_clawhub.py @@ -3,7 +3,7 @@ import unittest from unittest.mock import patch -from tools.skills_hub import ClawHubSource +from tools.skills_hub import ClawHubSource, SkillMeta class _MockResponse: @@ -22,21 +22,31 @@ class TestClawHubSource(unittest.TestCase): @patch("tools.skills_hub._write_index_cache") @patch("tools.skills_hub._read_index_cache", return_value=None) + @patch.object(ClawHubSource, "_load_catalog_index", return_value=[]) @patch("tools.skills_hub.httpx.get") - def test_search_uses_new_endpoint_and_parses_items(self, mock_get, _mock_read_cache, _mock_write_cache): - mock_get.return_value = _MockResponse( - status_code=200, - json_data={ - "items": [ - { - "slug": "caldav-calendar", - "displayName": "CalDAV Calendar", - "summary": "Calendar integration", - "tags": ["calendar", "productivity"], - } - ] - }, - ) + def test_search_uses_listing_endpoint_as_fallback( + self, mock_get, _mock_load_catalog, _mock_read_cache, _mock_write_cache + ): + def side_effect(url, *args, **kwargs): + if url.endswith("/skills"): + return _MockResponse( + status_code=200, + json_data={ + "items": [ + { + "slug": "caldav-calendar", + "displayName": "CalDAV Calendar", + "summary": "Calendar integration", + "tags": ["calendar", "productivity"], + } + ] + }, + ) + if url.endswith("/skills/caldav"): + return _MockResponse(status_code=404, json_data={}) + return _MockResponse(status_code=404, json_data={}) + + mock_get.side_effect = side_effect results = self.src.search("caldav", limit=5) @@ -45,11 +55,112 @@ class TestClawHubSource(unittest.TestCase): self.assertEqual(results[0].name, "CalDAV Calendar") self.assertEqual(results[0].description, "Calendar integration") - mock_get.assert_called_once() - args, kwargs = mock_get.call_args + self.assertGreaterEqual(mock_get.call_count, 2) + args, kwargs = mock_get.call_args_list[0] self.assertTrue(args[0].endswith("/skills")) self.assertEqual(kwargs["params"], {"search": "caldav", "limit": 5}) + @patch("tools.skills_hub._write_index_cache") + @patch("tools.skills_hub._read_index_cache", return_value=None) + @patch.object( + ClawHubSource, + "_load_catalog_index", + return_value=[], + ) + @patch("tools.skills_hub.httpx.get") + def test_search_falls_back_to_exact_slug_when_search_results_are_irrelevant( + self, mock_get, _mock_load_catalog, _mock_read_cache, _mock_write_cache + ): + def side_effect(url, *args, **kwargs): + if url.endswith("/skills"): + return _MockResponse( + status_code=200, + json_data={ + "items": [ + { + "slug": "apple-music-dj", + "displayName": "Apple Music DJ", + "summary": "Unrelated result", + } + ] + }, + ) + if url.endswith("/skills/self-improving-agent"): + return _MockResponse( + status_code=200, + json_data={ + "skill": { + "slug": "self-improving-agent", + "displayName": "self-improving-agent", + "summary": "Captures learnings and errors for continuous improvement.", + "tags": {"latest": "3.0.2", "automation": "3.0.2"}, + }, + "latestVersion": {"version": "3.0.2"}, + }, + ) + return _MockResponse(status_code=404, json_data={}) + + mock_get.side_effect = side_effect + + results = self.src.search("self-improving-agent", limit=5) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0].identifier, "self-improving-agent") + self.assertEqual(results[0].name, "self-improving-agent") + self.assertIn("continuous improvement", results[0].description) + + @patch("tools.skills_hub.httpx.get") + def test_search_repairs_poisoned_cache_with_exact_slug_lookup(self, mock_get): + mock_get.return_value = _MockResponse( + status_code=200, + json_data={ + "skill": { + "slug": "self-improving-agent", + "displayName": "self-improving-agent", + "summary": "Captures learnings and errors for continuous improvement.", + "tags": {"latest": "3.0.2", "automation": "3.0.2"}, + }, + "latestVersion": {"version": "3.0.2"}, + }, + ) + + poisoned = [ + SkillMeta( + name="Apple Music DJ", + description="Unrelated cached result", + source="clawhub", + identifier="apple-music-dj", + trust_level="community", + tags=[], + ) + ] + results = self.src._finalize_search_results("self-improving-agent", poisoned, 5) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0].identifier, "self-improving-agent") + mock_get.assert_called_once() + self.assertTrue(mock_get.call_args.args[0].endswith("/skills/self-improving-agent")) + + @patch.object( + ClawHubSource, + "_exact_slug_meta", + return_value=SkillMeta( + name="self-improving-agent", + description="Captures learnings and errors for continuous improvement.", + source="clawhub", + identifier="self-improving-agent", + trust_level="community", + tags=["automation"], + ), + ) + def test_search_matches_space_separated_query_to_hyphenated_slug( + self, _mock_exact_slug + ): + results = self.src.search("self improving", limit=5) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0].identifier, "self-improving-agent") + @patch("tools.skills_hub.httpx.get") def test_inspect_maps_display_name_and_summary(self, mock_get): mock_get.return_value = _MockResponse( @@ -69,6 +180,29 @@ class TestClawHubSource(unittest.TestCase): self.assertEqual(meta.description, "Calendar integration") self.assertEqual(meta.identifier, "caldav-calendar") + @patch("tools.skills_hub.httpx.get") + def test_inspect_handles_nested_skill_payload(self, mock_get): + mock_get.return_value = _MockResponse( + status_code=200, + json_data={ + "skill": { + "slug": "self-improving-agent", + "displayName": "self-improving-agent", + "summary": "Captures learnings and errors for continuous improvement.", + "tags": {"latest": "3.0.2", "automation": "3.0.2"}, + }, + "latestVersion": {"version": "3.0.2"}, + }, + ) + + meta = self.src.inspect("self-improving-agent") + + self.assertIsNotNone(meta) + self.assertEqual(meta.name, "self-improving-agent") + self.assertIn("continuous improvement", meta.description) + self.assertEqual(meta.identifier, "self-improving-agent") + self.assertEqual(meta.tags, ["automation"]) + @patch("tools.skills_hub.httpx.get") def test_fetch_resolves_latest_version_and_downloads_raw_files(self, mock_get): def side_effect(url, *args, **kwargs): diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 94845fe92..52472656c 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -1156,11 +1156,176 @@ class ClawHubSource(SkillSource): def trust_level_for(self, identifier: str) -> str: return "community" + @staticmethod + def _normalize_tags(tags: Any) -> List[str]: + if isinstance(tags, list): + return [str(t) for t in tags] + if isinstance(tags, dict): + return [str(k) for k in tags.keys() if str(k) != "latest"] + return [] + + @staticmethod + def _coerce_skill_payload(data: Any) -> Optional[Dict[str, Any]]: + if not isinstance(data, dict): + return None + nested = data.get("skill") + if isinstance(nested, dict): + merged = dict(nested) + latest_version = data.get("latestVersion") + if latest_version is not None and "latestVersion" not in merged: + merged["latestVersion"] = latest_version + return merged + return data + + @staticmethod + def _query_terms(query: str) -> List[str]: + return [term for term in re.split(r"[^a-z0-9]+", query.lower()) if term] + + @classmethod + def _search_score(cls, query: str, meta: SkillMeta) -> int: + query_norm = query.strip().lower() + if not query_norm: + return 1 + + identifier = (meta.identifier or "").lower() + name = (meta.name or "").lower() + description = (meta.description or "").lower() + normalized_identifier = " ".join(cls._query_terms(identifier)) + normalized_name = " ".join(cls._query_terms(name)) + query_terms = cls._query_terms(query_norm) + identifier_terms = cls._query_terms(identifier) + name_terms = cls._query_terms(name) + score = 0 + + if query_norm == identifier: + score += 140 + if query_norm == name: + score += 130 + if normalized_identifier == query_norm: + score += 125 + if normalized_name == query_norm: + score += 120 + if normalized_identifier.startswith(query_norm): + score += 95 + if normalized_name.startswith(query_norm): + score += 90 + if query_terms and identifier_terms[: len(query_terms)] == query_terms: + score += 70 + if query_terms and name_terms[: len(query_terms)] == query_terms: + score += 65 + if query_norm in identifier: + score += 40 + if query_norm in name: + score += 35 + if query_norm in description: + score += 10 + + for term in query_terms: + if term in identifier_terms: + score += 15 + if term in name_terms: + score += 12 + if term in description: + score += 3 + + return score + + @staticmethod + def _dedupe_results(results: List[SkillMeta]) -> List[SkillMeta]: + seen: set[str] = set() + deduped: List[SkillMeta] = [] + for result in results: + key = (result.identifier or result.name).lower() + if key in seen: + continue + seen.add(key) + deduped.append(result) + return deduped + + def _exact_slug_meta(self, query: str) -> Optional[SkillMeta]: + slug = query.strip().split("/")[-1] + query_terms = self._query_terms(query) + candidates: List[str] = [] + + if slug and re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9._-]*", slug): + candidates.append(slug) + + if query_terms: + base_slug = "-".join(query_terms) + if len(query_terms) >= 2: + candidates.extend([ + f"{base_slug}-agent", + f"{base_slug}-skill", + f"{base_slug}-tool", + f"{base_slug}-assistant", + f"{base_slug}-playbook", + base_slug, + ]) + else: + candidates.append(base_slug) + + seen: set[str] = set() + for candidate in candidates: + if candidate in seen: + continue + seen.add(candidate) + meta = self.inspect(candidate) + if meta: + return meta + + return None + + def _finalize_search_results(self, query: str, results: List[SkillMeta], limit: int) -> List[SkillMeta]: + query_norm = query.strip() + if not query_norm: + return self._dedupe_results(results)[:limit] + + filtered = [meta for meta in results if self._search_score(query_norm, meta) > 0] + filtered.sort( + key=lambda meta: ( + -self._search_score(query_norm, meta), + meta.name.lower(), + meta.identifier.lower(), + ) + ) + filtered = self._dedupe_results(filtered) + + exact = self._exact_slug_meta(query_norm) + if exact: + filtered = [meta for meta in filtered if self._search_score(query_norm, meta) >= 20] + filtered = self._dedupe_results([exact] + filtered) + + if filtered: + return filtered[:limit] + + if re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9._/-]*", query_norm): + return [] + + return self._dedupe_results(results)[:limit] + def search(self, query: str, limit: int = 10) -> List[SkillMeta]: - cache_key = f"clawhub_search_{hashlib.md5(query.encode()).hexdigest()}" + query = query.strip() + + if query: + query_terms = self._query_terms(query) + if len(query_terms) >= 2: + direct = self._exact_slug_meta(query) + if direct: + return [direct] + + results = self._search_catalog(query, limit=limit) + if results: + return results + + # Empty query or catalog fallback failure: use the lightweight listing API. + cache_key = f"clawhub_search_listing_v1_{hashlib.md5(query.encode()).hexdigest()}_{limit}" cached = _read_index_cache(cache_key) if cached is not None: - return [SkillMeta(**s) for s in cached][:limit] + return self._finalize_search_results( + query, + [SkillMeta(**s) for s in cached], + limit, + ) try: resp = httpx.get( @@ -1185,20 +1350,19 @@ class ClawHubSource(SkillSource): continue display_name = item.get("displayName") or item.get("name") or slug summary = item.get("summary") or item.get("description") or "" - tags = item.get("tags", []) - if not isinstance(tags, list): - tags = [] + tags = self._normalize_tags(item.get("tags", [])) results.append(SkillMeta( name=display_name, description=summary, source="clawhub", identifier=slug, trust_level="community", - tags=[str(t) for t in tags], + tags=tags, )) - _write_index_cache(cache_key, [_skill_meta_to_dict(s) for s in results]) - return results + final_results = self._finalize_search_results(query, results, limit) + _write_index_cache(cache_key, [_skill_meta_to_dict(s) for s in final_results]) + return final_results def fetch(self, identifier: str) -> Optional[SkillBundle]: slug = identifier.split("/")[-1] @@ -1244,13 +1408,11 @@ class ClawHubSource(SkillSource): def inspect(self, identifier: str) -> Optional[SkillMeta]: slug = identifier.split("/")[-1] - data = self._get_json(f"{self.BASE_URL}/skills/{slug}") + data = self._coerce_skill_payload(self._get_json(f"{self.BASE_URL}/skills/{slug}")) if not isinstance(data, dict): return None - tags = data.get("tags", []) - if not isinstance(tags, list): - tags = [] + tags = self._normalize_tags(data.get("tags", [])) return SkillMeta( name=data.get("displayName") or data.get("name") or data.get("slug") or slug, @@ -1258,9 +1420,75 @@ class ClawHubSource(SkillSource): source="clawhub", identifier=data.get("slug") or slug, trust_level="community", - tags=[str(t) for t in tags], + tags=tags, ) + def _search_catalog(self, query: str, limit: int = 10) -> List[SkillMeta]: + cache_key = f"clawhub_search_catalog_v1_{hashlib.md5(f'{query}|{limit}'.encode()).hexdigest()}" + cached = _read_index_cache(cache_key) + if cached is not None: + return [SkillMeta(**s) for s in cached][:limit] + + catalog = self._load_catalog_index() + if not catalog: + return [] + + results = self._finalize_search_results(query, catalog, limit) + _write_index_cache(cache_key, [_skill_meta_to_dict(s) for s in results]) + return results + + def _load_catalog_index(self) -> List[SkillMeta]: + cache_key = "clawhub_catalog_v1" + cached = _read_index_cache(cache_key) + if cached is not None: + return [SkillMeta(**s) for s in cached] + + cursor: Optional[str] = None + results: List[SkillMeta] = [] + seen: set[str] = set() + max_pages = 50 + + for _ in range(max_pages): + params: Dict[str, Any] = {"limit": 200} + if cursor: + params["cursor"] = cursor + + try: + resp = httpx.get(f"{self.BASE_URL}/skills", params=params, timeout=30) + if resp.status_code != 200: + break + data = resp.json() + except (httpx.HTTPError, json.JSONDecodeError): + break + + items = data.get("items", []) if isinstance(data, dict) else [] + if not isinstance(items, list) or not items: + break + + for item in items: + slug = item.get("slug") + if not isinstance(slug, str) or not slug or slug in seen: + continue + seen.add(slug) + display_name = item.get("displayName") or item.get("name") or slug + summary = item.get("summary") or item.get("description") or "" + tags = self._normalize_tags(item.get("tags", [])) + results.append(SkillMeta( + name=display_name, + description=summary, + source="clawhub", + identifier=slug, + trust_level="community", + tags=tags, + )) + + cursor = data.get("nextCursor") if isinstance(data, dict) else None + if not isinstance(cursor, str) or not cursor: + break + + _write_index_cache(cache_key, [_skill_meta_to_dict(s) for s in results]) + return results + def _get_json(self, url: str, timeout: int = 20) -> Optional[Any]: try: resp = httpx.get(url, timeout=timeout)