#!/usr/bin/env python3 """ Skills Tool Module This module provides tools for listing and viewing skill documents. Skills are organized as directories containing a SKILL.md file (the main instructions) and optional supporting files like references, templates, and examples. Inspired by Anthropic's Claude Skills system with progressive disclosure architecture: - Metadata (name ≤64 chars, description ≤1024 chars) - shown in skills_list - Full Instructions - loaded via skill_view when needed - Linked Files (references, templates) - loaded on demand Directory Structure: skills/ ├── my-skill/ │ ├── SKILL.md # Main instructions (required) │ ├── references/ # Supporting documentation │ │ ├── api.md │ │ └── examples.md │ └── templates/ # Templates for output │ └── template.md └── category/ # Category folder for organization └── another-skill/ └── SKILL.md SKILL.md Format (YAML Frontmatter): --- name: skill-name # Required, max 64 chars description: Brief description # Required, max 1024 chars tags: [fine-tuning, llm] # Optional, for filtering related_skills: [peft, lora] # Optional, for composability version: 1.0.0 # Optional, for tracking --- # Skill Title Full instructions and content here... Available tools: - skills_list: List skills with metadata (progressive disclosure tier 1) - skill_view: Load full skill content (progressive disclosure tier 2-3) Usage: from tools.skills_tool import skills_list, skill_view, check_skills_requirements # List all skills (returns metadata only - token efficient) result = skills_list() # View a skill's main content (loads full instructions) content = skill_view("axolotl") # View a reference file within a skill (loads linked file) content = skill_view("axolotl", "references/dataset-formats.md") """ import json import os import re from pathlib import Path from typing import Dict, Any, List, Optional, Tuple # Default skills directory (relative to repo root) SKILLS_DIR = Path(__file__).parent.parent / "skills" # Anthropic-recommended limits for progressive disclosure efficiency MAX_NAME_LENGTH = 64 MAX_DESCRIPTION_LENGTH = 1024 def check_skills_requirements() -> bool: """ Check if skills tool requirements are met. Returns: bool: True if the skills directory exists, False otherwise """ return SKILLS_DIR.exists() and SKILLS_DIR.is_dir() def _parse_frontmatter(content: str) -> Tuple[Dict[str, str], str]: """ Parse YAML frontmatter from markdown content. Args: content: Full markdown file content Returns: Tuple of (frontmatter dict, remaining content) """ frontmatter = {} body = content # Check for YAML frontmatter (starts with ---) if content.startswith("---"): # Find the closing --- end_match = re.search(r'\n---\s*\n', content[3:]) if end_match: yaml_content = content[3:end_match.start() + 3] body = content[end_match.end() + 3:] # Simple YAML parsing for key: value pairs for line in yaml_content.strip().split('\n'): if ':' in line: key, value = line.split(':', 1) frontmatter[key.strip()] = value.strip() return frontmatter, body def _get_category_from_path(skill_path: Path) -> Optional[str]: """ Extract category from skill path based on directory structure. For paths like: skills/03-fine-tuning/axolotl/SKILL.md Returns: "03-fine-tuning" Args: skill_path: Path to SKILL.md file Returns: Category name or None if skill is at root level """ try: # Get path relative to skills directory rel_path = skill_path.relative_to(SKILLS_DIR) parts = rel_path.parts # If there are at least 2 parts (category/skill/SKILL.md), return category if len(parts) >= 3: return parts[0] return None except ValueError: return None def _estimate_tokens(content: str) -> int: """ Rough token estimate (4 chars per token average). Args: content: Text content Returns: Estimated token count """ return len(content) // 4 def _parse_tags(tags_value: str) -> List[str]: """ Parse tags from frontmatter value. Handles both: - YAML list format: [tag1, tag2] - Comma-separated: tag1, tag2 Args: tags_value: Raw tags string from frontmatter Returns: List of tag strings """ if not tags_value: return [] # Remove brackets if present tags_value = tags_value.strip() if tags_value.startswith('[') and tags_value.endswith(']'): tags_value = tags_value[1:-1] # Split by comma and clean up return [t.strip().strip('"\'') for t in tags_value.split(',') if t.strip()] def _find_all_skills() -> List[Dict[str, Any]]: """ Recursively find all skills in the skills directory. Returns metadata for progressive disclosure (tier 1): - name (≤64 chars) - description (≤1024 chars) - category, path, tags, related_skills - reference/template file counts - estimated token count for full content Skills can be: 1. Directories containing SKILL.md (preferred) 2. Flat .md files (legacy support) Returns: List of skill metadata dicts """ skills = [] if not SKILLS_DIR.exists(): return skills # Find all SKILL.md files recursively for skill_md in SKILLS_DIR.rglob("SKILL.md"): # Skip hidden directories and common non-skill folders path_str = str(skill_md) if '/.git/' in path_str or '/.github/' in path_str: continue skill_dir = skill_md.parent try: content = skill_md.read_text(encoding='utf-8') frontmatter, body = _parse_frontmatter(content) # Get name from frontmatter or directory name (max 64 chars) name = frontmatter.get('name', skill_dir.name)[:MAX_NAME_LENGTH] # Get description from frontmatter or first paragraph (max 1024 chars) description = frontmatter.get('description', '') if not description: for line in body.strip().split('\n'): line = line.strip() if line and not line.startswith('#'): description = line break # Truncate description to limit if len(description) > MAX_DESCRIPTION_LENGTH: description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..." # Get category from path category = _get_category_from_path(skill_md) # Track the path internally for excluding from legacy search skill_path = str(skill_dir.relative_to(SKILLS_DIR)) # Minimal entry for list - full details in skill_view() skills.append({ "name": name, "description": description, "category": category, "_path": skill_path # Internal only, removed before return }) except Exception as e: # Skip files that can't be read continue # Also find flat .md files at any level (legacy support) # But exclude files in skill directories (already handled above) skill_dirs = {s["_path"] for s in skills} for md_file in SKILLS_DIR.rglob("*.md"): # Skip SKILL.md files (already handled) if md_file.name == "SKILL.md": continue # Skip hidden directories path_str = str(md_file) if '/.git/' in path_str or '/.github/' in path_str: continue # Skip files inside skill directories (they're references, not standalone skills) rel_dir = str(md_file.parent.relative_to(SKILLS_DIR)) if any(rel_dir.startswith(sd) for sd in skill_dirs): continue # Skip common non-skill files if md_file.name in ['README.md', 'CONTRIBUTING.md', 'CLAUDE.md', 'LICENSE']: continue if md_file.name.startswith('_'): continue try: content = md_file.read_text(encoding='utf-8') frontmatter, body = _parse_frontmatter(content) name = frontmatter.get('name', md_file.stem)[:MAX_NAME_LENGTH] description = frontmatter.get('description', '') if not description: for line in body.strip().split('\n'): line = line.strip() if line and not line.startswith('#'): description = line break if len(description) > MAX_DESCRIPTION_LENGTH: description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..." # Get category from parent directory if not at root category = None rel_path = md_file.relative_to(SKILLS_DIR) if len(rel_path.parts) > 1: category = rel_path.parts[0] # Parse optional fields tags = _parse_tags(frontmatter.get('tags', '')) # Minimal entry for list - full details in skill_view() skills.append({ "name": name, "description": description, "category": category }) except Exception: continue # Strip internal _path field before returning for skill in skills: skill.pop("_path", None) return skills def _load_category_description(category_dir: Path) -> Optional[str]: """ Load category description from DESCRIPTION.md if it exists. Args: category_dir: Path to the category directory Returns: Description string or None if not found """ desc_file = category_dir / "DESCRIPTION.md" if not desc_file.exists(): return None try: content = desc_file.read_text(encoding='utf-8') # Parse frontmatter if present frontmatter, body = _parse_frontmatter(content) # Prefer frontmatter description, fall back to first non-header line description = frontmatter.get('description', '') if not description: for line in body.strip().split('\n'): line = line.strip() if line and not line.startswith('#'): description = line break # Truncate to reasonable length if len(description) > MAX_DESCRIPTION_LENGTH: description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..." return description if description else None except Exception: return None def skills_categories(verbose: bool = False, task_id: str = None) -> str: """ List available skill categories with descriptions (progressive disclosure tier 0). Returns category names and descriptions for efficient discovery before drilling down. Categories can have a DESCRIPTION.md file with a description frontmatter field or first paragraph to explain what skills are in that category. Args: verbose: If True, include skill counts per category (default: False, but currently always included) task_id: Optional task identifier (unused, for API consistency) Returns: JSON string with list of categories and their descriptions """ try: if not SKILLS_DIR.exists(): return json.dumps({ "success": True, "categories": [], "message": "No skills directory found." }, ensure_ascii=False) # Scan for categories (top-level directories containing skills) category_dirs = {} for skill_md in SKILLS_DIR.rglob("SKILL.md"): category = _get_category_from_path(skill_md) if category: category_dir = SKILLS_DIR / category if category not in category_dirs: category_dirs[category] = category_dir # Build category list with descriptions categories = [] for name in sorted(category_dirs.keys()): category_dir = category_dirs[name] description = _load_category_description(category_dir) # Count skills in this category skill_count = sum(1 for _ in category_dir.rglob("SKILL.md")) cat_entry = { "name": name, "skill_count": skill_count } if description: cat_entry["description"] = description categories.append(cat_entry) return json.dumps({ "success": True, "categories": categories, "hint": "If a category is relevant to your task, use skills_list with that category to see available skills" }, ensure_ascii=False) except Exception as e: return json.dumps({ "success": False, "error": str(e) }, ensure_ascii=False) def skills_list(category: str = None, task_id: str = None) -> str: """ List all available skills (progressive disclosure tier 1 - minimal metadata). Returns only name + description to minimize token usage. Use skill_view() to load full content, tags, related files, etc. Args: category: Optional category filter (e.g., "mlops") task_id: Optional task identifier (unused, for API consistency) Returns: JSON string with minimal skill info: name, description, category """ try: # Ensure skills directory exists if not SKILLS_DIR.exists(): SKILLS_DIR.mkdir(parents=True, exist_ok=True) return json.dumps({ "success": True, "skills": [], "categories": [], "message": "Skills directory created. No skills available yet." }, ensure_ascii=False) # Find all skills all_skills = _find_all_skills() if not all_skills: return json.dumps({ "success": True, "skills": [], "categories": [], "message": "No skills found in skills/ directory." }, ensure_ascii=False) # Filter by category if specified if category: all_skills = [s for s in all_skills if s.get("category") == category] # Sort by category then name all_skills.sort(key=lambda s: (s.get("category") or "", s["name"])) # Extract unique categories categories = sorted(set(s.get("category") for s in all_skills if s.get("category"))) return json.dumps({ "success": True, "skills": all_skills, "categories": categories, "count": len(all_skills), "hint": "Use skill_view(name) to see full content, tags, and linked files" }, ensure_ascii=False) except Exception as e: return json.dumps({ "success": False, "error": str(e) }, ensure_ascii=False) def skill_view(name: str, file_path: str = None, task_id: str = None) -> str: """ View the content of a skill or a specific file within a skill directory. Args: name: Name or path of the skill (e.g., "axolotl" or "03-fine-tuning/axolotl") file_path: Optional path to a specific file within the skill (e.g., "references/api.md") task_id: Optional task identifier (unused, for API consistency) Returns: JSON string with skill content or error message """ try: if not SKILLS_DIR.exists(): return json.dumps({ "success": False, "error": "Skills directory does not exist." }, ensure_ascii=False) # Find the skill skill_dir = None skill_md = None # Try direct path first (e.g., "03-fine-tuning/axolotl") direct_path = SKILLS_DIR / name if direct_path.is_dir() and (direct_path / "SKILL.md").exists(): skill_dir = direct_path skill_md = direct_path / "SKILL.md" elif direct_path.with_suffix('.md').exists(): # Legacy flat file skill_md = direct_path.with_suffix('.md') else: # Search for skill by name for found_skill_md in SKILLS_DIR.rglob("SKILL.md"): if found_skill_md.parent.name == name: skill_dir = found_skill_md.parent skill_md = found_skill_md break # Also check flat .md files if not skill_md: for found_md in SKILLS_DIR.rglob(f"{name}.md"): if found_md.name != "SKILL.md": skill_md = found_md break if not skill_md or not skill_md.exists(): # List available skills in error message all_skills = _find_all_skills() available = [s["name"] for s in all_skills[:20]] # Limit to 20 return json.dumps({ "success": False, "error": f"Skill '{name}' not found.", "available_skills": available, "hint": "Use skills_list to see all available skills" }, ensure_ascii=False) # If a specific file path is requested, read that instead if file_path and skill_dir: target_file = skill_dir / file_path if not target_file.exists(): # List available files in the skill directory, organized by type available_files = { "references": [], "templates": [], "scripts": [], "other": [] } # Scan for all readable files for f in skill_dir.rglob("*"): if f.is_file() and f.name != "SKILL.md": rel = str(f.relative_to(skill_dir)) if rel.startswith("references/"): available_files["references"].append(rel) elif rel.startswith("templates/"): available_files["templates"].append(rel) elif rel.startswith("scripts/"): available_files["scripts"].append(rel) elif f.suffix in ['.md', '.py', '.yaml', '.yml', '.json', '.tex', '.sh']: available_files["other"].append(rel) # Remove empty categories available_files = {k: v for k, v in available_files.items() if v} return json.dumps({ "success": False, "error": f"File '{file_path}' not found in skill '{name}'.", "available_files": available_files, "hint": "Use one of the available file paths listed above" }, ensure_ascii=False) # Read the file content try: content = target_file.read_text(encoding='utf-8') except UnicodeDecodeError: # Binary file - return info about it instead return json.dumps({ "success": True, "name": name, "file": file_path, "content": f"[Binary file: {target_file.name}, size: {target_file.stat().st_size} bytes]", "is_binary": True }, ensure_ascii=False) return json.dumps({ "success": True, "name": name, "file": file_path, "content": content, "file_type": target_file.suffix }, ensure_ascii=False) # Read the main skill content content = skill_md.read_text(encoding='utf-8') frontmatter, body = _parse_frontmatter(content) # Get reference, template, and script files if this is a directory-based skill reference_files = [] template_files = [] script_files = [] if skill_dir: # References (documentation) references_dir = skill_dir / "references" if references_dir.exists(): reference_files = [str(f.relative_to(skill_dir)) for f in references_dir.glob("*.md")] # Templates (output formats, boilerplate) templates_dir = skill_dir / "templates" if templates_dir.exists(): for ext in ['*.md', '*.py', '*.yaml', '*.yml', '*.json', '*.tex', '*.sh']: template_files.extend([str(f.relative_to(skill_dir)) for f in templates_dir.rglob(ext)]) # Scripts (executable helpers) scripts_dir = skill_dir / "scripts" if scripts_dir.exists(): for ext in ['*.py', '*.sh', '*.bash', '*.js', '*.ts', '*.rb']: script_files.extend([str(f.relative_to(skill_dir)) for f in scripts_dir.glob(ext)]) # Parse metadata tags = _parse_tags(frontmatter.get('tags', '')) related_skills = _parse_tags(frontmatter.get('related_skills', '')) # Build linked files structure for clear discovery linked_files = {} if reference_files: linked_files["references"] = reference_files if template_files: linked_files["templates"] = template_files if script_files: linked_files["scripts"] = script_files return json.dumps({ "success": True, "name": frontmatter.get('name', skill_md.stem if not skill_dir else skill_dir.name), "description": frontmatter.get('description', ''), "tags": tags, "related_skills": related_skills, "content": content, "path": str(skill_md.relative_to(SKILLS_DIR)), "linked_files": linked_files if linked_files else None, "usage_hint": "To view linked files, call skill_view(name, file_path) where file_path is e.g. 'references/api.md' or 'templates/config.yaml'" if linked_files else None }, ensure_ascii=False) except Exception as e: return json.dumps({ "success": False, "error": str(e) }, ensure_ascii=False) # Tool description for model_tools.py SKILLS_TOOL_DESCRIPTION = """Access skill documents providing specialized instructions, guidelines, and executable knowledge. Progressive disclosure workflow: 1. skills_list() - Returns metadata (name, description, tags, linked_file_count) for all skills 2. skill_view(name) - Loads full SKILL.md content + shows available linked_files (references/templates/scripts) 3. skill_view(name, file_path) - Loads specific linked file (e.g., 'references/api.md', 'scripts/train.py') Skills may include: - references/: Additional documentation, API specs, examples - templates/: Output formats, config files, boilerplate code - scripts/: Executable helpers (Python, shell scripts)""" if __name__ == "__main__": """Test the skills tool""" print("🎯 Skills Tool Test") print("=" * 60) # Test listing skills print("\n📋 Listing all skills:") result = json.loads(skills_list()) if result["success"]: print(f"Found {result['count']} skills in {len(result.get('categories', []))} categories") print(f"Categories: {result.get('categories', [])}") print("\nFirst 10 skills:") for skill in result["skills"][:10]: cat = f"[{skill['category']}] " if skill.get('category') else "" refs = f" (+{len(skill['reference_files'])} refs)" if skill.get('reference_files') else "" print(f" • {cat}{skill['name']}: {skill['description'][:60]}...{refs}") else: print(f"Error: {result['error']}") # Test viewing a skill print("\n📖 Viewing skill 'axolotl':") result = json.loads(skill_view("axolotl")) if result["success"]: print(f"Name: {result['name']}") print(f"Description: {result.get('description', 'N/A')[:100]}...") print(f"Content length: {len(result['content'])} chars") if result.get('reference_files'): print(f"Reference files: {result['reference_files']}") else: print(f"Error: {result['error']}") # Test viewing a reference file print("\n📄 Viewing reference file 'axolotl/references/dataset-formats.md':") result = json.loads(skill_view("axolotl", "references/dataset-formats.md")) if result["success"]: print(f"File: {result['file']}") print(f"Content length: {len(result['content'])} chars") print(f"Preview: {result['content'][:150]}...") else: print(f"Error: {result['error']}")