Files
hermes-agent/tools/skills_tool.py

702 lines
25 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Skills Tool Module
This module provides tools for listing and viewing skill documents.
Skills are organized as directories containing a SKILL.md file (the main instructions)
and optional supporting files like references, templates, and examples.
Inspired by Anthropic's Claude Skills system with progressive disclosure architecture:
- Metadata (name 64 chars, description 1024 chars) - shown in skills_list
- Full Instructions - loaded via skill_view when needed
- Linked Files (references, templates) - loaded on demand
Directory Structure:
skills/
my-skill/
SKILL.md # Main instructions (required)
references/ # Supporting documentation
api.md
examples.md
templates/ # Templates for output
template.md
category/ # Category folder for organization
another-skill/
SKILL.md
SKILL.md Format (YAML Frontmatter):
---
name: skill-name # Required, max 64 chars
description: Brief description # Required, max 1024 chars
tags: [fine-tuning, llm] # Optional, for filtering
related_skills: [peft, lora] # Optional, for composability
version: 1.0.0 # Optional, for tracking
---
# Skill Title
Full instructions and content here...
Available tools:
- skills_list: List skills with metadata (progressive disclosure tier 1)
- skill_view: Load full skill content (progressive disclosure tier 2-3)
Usage:
from tools.skills_tool import skills_list, skill_view, check_skills_requirements
# List all skills (returns metadata only - token efficient)
result = skills_list()
# View a skill's main content (loads full instructions)
content = skill_view("axolotl")
# View a reference file within a skill (loads linked file)
content = skill_view("axolotl", "references/dataset-formats.md")
"""
import json
import os
import re
from pathlib import Path
from typing import Dict, Any, List, Optional, Tuple
# Default skills directory (relative to repo root)
SKILLS_DIR = Path(__file__).parent.parent / "skills"
# Anthropic-recommended limits for progressive disclosure efficiency
MAX_NAME_LENGTH = 64
MAX_DESCRIPTION_LENGTH = 1024
def check_skills_requirements() -> bool:
"""
Check if skills tool requirements are met.
Returns:
bool: True if the skills directory exists, False otherwise
"""
return SKILLS_DIR.exists() and SKILLS_DIR.is_dir()
def _parse_frontmatter(content: str) -> Tuple[Dict[str, str], str]:
"""
Parse YAML frontmatter from markdown content.
Args:
content: Full markdown file content
Returns:
Tuple of (frontmatter dict, remaining content)
"""
frontmatter = {}
body = content
# Check for YAML frontmatter (starts with ---)
if content.startswith("---"):
# Find the closing ---
end_match = re.search(r'\n---\s*\n', content[3:])
if end_match:
yaml_content = content[3:end_match.start() + 3]
body = content[end_match.end() + 3:]
# Simple YAML parsing for key: value pairs
for line in yaml_content.strip().split('\n'):
if ':' in line:
key, value = line.split(':', 1)
frontmatter[key.strip()] = value.strip()
return frontmatter, body
def _get_category_from_path(skill_path: Path) -> Optional[str]:
"""
Extract category from skill path based on directory structure.
For paths like: skills/03-fine-tuning/axolotl/SKILL.md
Returns: "03-fine-tuning"
Args:
skill_path: Path to SKILL.md file
Returns:
Category name or None if skill is at root level
"""
try:
# Get path relative to skills directory
rel_path = skill_path.relative_to(SKILLS_DIR)
parts = rel_path.parts
# If there are at least 2 parts (category/skill/SKILL.md), return category
if len(parts) >= 3:
return parts[0]
return None
except ValueError:
return None
def _estimate_tokens(content: str) -> int:
"""
Rough token estimate (4 chars per token average).
Args:
content: Text content
Returns:
Estimated token count
"""
return len(content) // 4
def _parse_tags(tags_value: str) -> List[str]:
"""
Parse tags from frontmatter value.
Handles both:
- YAML list format: [tag1, tag2]
- Comma-separated: tag1, tag2
Args:
tags_value: Raw tags string from frontmatter
Returns:
List of tag strings
"""
if not tags_value:
return []
# Remove brackets if present
tags_value = tags_value.strip()
if tags_value.startswith('[') and tags_value.endswith(']'):
tags_value = tags_value[1:-1]
# Split by comma and clean up
return [t.strip().strip('"\'') for t in tags_value.split(',') if t.strip()]
def _find_all_skills() -> List[Dict[str, Any]]:
"""
Recursively find all skills in the skills directory.
Returns metadata for progressive disclosure (tier 1):
- name (64 chars)
- description (1024 chars)
- category, path, tags, related_skills
- reference/template file counts
- estimated token count for full content
Skills can be:
1. Directories containing SKILL.md (preferred)
2. Flat .md files (legacy support)
Returns:
List of skill metadata dicts
"""
skills = []
if not SKILLS_DIR.exists():
return skills
# Find all SKILL.md files recursively
for skill_md in SKILLS_DIR.rglob("SKILL.md"):
# Skip hidden directories and common non-skill folders
path_str = str(skill_md)
if '/.git/' in path_str or '/.github/' in path_str:
continue
skill_dir = skill_md.parent
try:
content = skill_md.read_text(encoding='utf-8')
frontmatter, body = _parse_frontmatter(content)
# Get name from frontmatter or directory name (max 64 chars)
name = frontmatter.get('name', skill_dir.name)[:MAX_NAME_LENGTH]
# Get description from frontmatter or first paragraph (max 1024 chars)
description = frontmatter.get('description', '')
if not description:
for line in body.strip().split('\n'):
line = line.strip()
if line and not line.startswith('#'):
description = line
break
# Truncate description to limit
if len(description) > MAX_DESCRIPTION_LENGTH:
description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..."
# Get category from path
category = _get_category_from_path(skill_md)
# Track the path internally for excluding from legacy search
skill_path = str(skill_dir.relative_to(SKILLS_DIR))
# Minimal entry for list - full details in skill_view()
skills.append({
"name": name,
"description": description,
"category": category,
"_path": skill_path # Internal only, removed before return
})
except Exception as e:
# Skip files that can't be read
continue
# Also find flat .md files at any level (legacy support)
# But exclude files in skill directories (already handled above)
skill_dirs = {s["_path"] for s in skills}
for md_file in SKILLS_DIR.rglob("*.md"):
# Skip SKILL.md files (already handled)
if md_file.name == "SKILL.md":
continue
# Skip hidden directories
path_str = str(md_file)
if '/.git/' in path_str or '/.github/' in path_str:
continue
# Skip files inside skill directories (they're references, not standalone skills)
rel_dir = str(md_file.parent.relative_to(SKILLS_DIR))
if any(rel_dir.startswith(sd) for sd in skill_dirs):
continue
# Skip common non-skill files
if md_file.name in ['README.md', 'CONTRIBUTING.md', 'CLAUDE.md', 'LICENSE']:
continue
if md_file.name.startswith('_'):
continue
try:
content = md_file.read_text(encoding='utf-8')
frontmatter, body = _parse_frontmatter(content)
name = frontmatter.get('name', md_file.stem)[:MAX_NAME_LENGTH]
description = frontmatter.get('description', '')
if not description:
for line in body.strip().split('\n'):
line = line.strip()
if line and not line.startswith('#'):
description = line
break
if len(description) > MAX_DESCRIPTION_LENGTH:
description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..."
# Get category from parent directory if not at root
category = None
rel_path = md_file.relative_to(SKILLS_DIR)
if len(rel_path.parts) > 1:
category = rel_path.parts[0]
# Parse optional fields
tags = _parse_tags(frontmatter.get('tags', ''))
# Minimal entry for list - full details in skill_view()
skills.append({
"name": name,
"description": description,
"category": category
})
except Exception:
continue
# Strip internal _path field before returning
for skill in skills:
skill.pop("_path", None)
return skills
def _load_category_description(category_dir: Path) -> Optional[str]:
"""
Load category description from DESCRIPTION.md if it exists.
Args:
category_dir: Path to the category directory
Returns:
Description string or None if not found
"""
desc_file = category_dir / "DESCRIPTION.md"
if not desc_file.exists():
return None
try:
content = desc_file.read_text(encoding='utf-8')
# Parse frontmatter if present
frontmatter, body = _parse_frontmatter(content)
# Prefer frontmatter description, fall back to first non-header line
description = frontmatter.get('description', '')
if not description:
for line in body.strip().split('\n'):
line = line.strip()
if line and not line.startswith('#'):
description = line
break
# Truncate to reasonable length
if len(description) > MAX_DESCRIPTION_LENGTH:
description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..."
return description if description else None
except Exception:
return None
def skills_categories(verbose: bool = False, task_id: str = None) -> str:
"""
List available skill categories with descriptions (progressive disclosure tier 0).
Returns category names and descriptions for efficient discovery before drilling down.
Categories can have a DESCRIPTION.md file with a description frontmatter field
or first paragraph to explain what skills are in that category.
Args:
verbose: If True, include skill counts per category (default: False, but currently always included)
task_id: Optional task identifier (unused, for API consistency)
Returns:
JSON string with list of categories and their descriptions
"""
try:
if not SKILLS_DIR.exists():
return json.dumps({
"success": True,
"categories": [],
"message": "No skills directory found."
}, ensure_ascii=False)
# Scan for categories (top-level directories containing skills)
category_dirs = {}
for skill_md in SKILLS_DIR.rglob("SKILL.md"):
category = _get_category_from_path(skill_md)
if category:
category_dir = SKILLS_DIR / category
if category not in category_dirs:
category_dirs[category] = category_dir
# Build category list with descriptions
categories = []
for name in sorted(category_dirs.keys()):
category_dir = category_dirs[name]
description = _load_category_description(category_dir)
# Count skills in this category
skill_count = sum(1 for _ in category_dir.rglob("SKILL.md"))
cat_entry = {
"name": name,
"skill_count": skill_count
}
if description:
cat_entry["description"] = description
categories.append(cat_entry)
return json.dumps({
"success": True,
"categories": categories,
"hint": "If a category is relevant to your task, use skills_list with that category to see available skills"
}, ensure_ascii=False)
except Exception as e:
return json.dumps({
"success": False,
"error": str(e)
}, ensure_ascii=False)
def skills_list(category: str = None, task_id: str = None) -> str:
"""
List all available skills (progressive disclosure tier 1 - minimal metadata).
Returns only name + description to minimize token usage. Use skill_view() to
load full content, tags, related files, etc.
Args:
category: Optional category filter (e.g., "mlops")
task_id: Optional task identifier (unused, for API consistency)
Returns:
JSON string with minimal skill info: name, description, category
"""
try:
# Ensure skills directory exists
if not SKILLS_DIR.exists():
SKILLS_DIR.mkdir(parents=True, exist_ok=True)
return json.dumps({
"success": True,
"skills": [],
"categories": [],
"message": "Skills directory created. No skills available yet."
}, ensure_ascii=False)
# Find all skills
all_skills = _find_all_skills()
if not all_skills:
return json.dumps({
"success": True,
"skills": [],
"categories": [],
"message": "No skills found in skills/ directory."
}, ensure_ascii=False)
# Filter by category if specified
if category:
all_skills = [s for s in all_skills if s.get("category") == category]
# Sort by category then name
all_skills.sort(key=lambda s: (s.get("category") or "", s["name"]))
# Extract unique categories
categories = sorted(set(s.get("category") for s in all_skills if s.get("category")))
return json.dumps({
"success": True,
"skills": all_skills,
"categories": categories,
"count": len(all_skills),
"hint": "Use skill_view(name) to see full content, tags, and linked files"
}, ensure_ascii=False)
except Exception as e:
return json.dumps({
"success": False,
"error": str(e)
}, ensure_ascii=False)
def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
"""
View the content of a skill or a specific file within a skill directory.
Args:
name: Name or path of the skill (e.g., "axolotl" or "03-fine-tuning/axolotl")
file_path: Optional path to a specific file within the skill (e.g., "references/api.md")
task_id: Optional task identifier (unused, for API consistency)
Returns:
JSON string with skill content or error message
"""
try:
if not SKILLS_DIR.exists():
return json.dumps({
"success": False,
"error": "Skills directory does not exist."
}, ensure_ascii=False)
# Find the skill
skill_dir = None
skill_md = None
# Try direct path first (e.g., "03-fine-tuning/axolotl")
direct_path = SKILLS_DIR / name
if direct_path.is_dir() and (direct_path / "SKILL.md").exists():
skill_dir = direct_path
skill_md = direct_path / "SKILL.md"
elif direct_path.with_suffix('.md').exists():
# Legacy flat file
skill_md = direct_path.with_suffix('.md')
else:
# Search for skill by name
for found_skill_md in SKILLS_DIR.rglob("SKILL.md"):
if found_skill_md.parent.name == name:
skill_dir = found_skill_md.parent
skill_md = found_skill_md
break
# Also check flat .md files
if not skill_md:
for found_md in SKILLS_DIR.rglob(f"{name}.md"):
if found_md.name != "SKILL.md":
skill_md = found_md
break
if not skill_md or not skill_md.exists():
# List available skills in error message
all_skills = _find_all_skills()
available = [s["name"] for s in all_skills[:20]] # Limit to 20
return json.dumps({
"success": False,
"error": f"Skill '{name}' not found.",
"available_skills": available,
"hint": "Use skills_list to see all available skills"
}, ensure_ascii=False)
# If a specific file path is requested, read that instead
if file_path and skill_dir:
target_file = skill_dir / file_path
if not target_file.exists():
# List available files in the skill directory, organized by type
available_files = {
"references": [],
"templates": [],
"scripts": [],
"other": []
}
# Scan for all readable files
for f in skill_dir.rglob("*"):
if f.is_file() and f.name != "SKILL.md":
rel = str(f.relative_to(skill_dir))
if rel.startswith("references/"):
available_files["references"].append(rel)
elif rel.startswith("templates/"):
available_files["templates"].append(rel)
elif rel.startswith("scripts/"):
available_files["scripts"].append(rel)
elif f.suffix in ['.md', '.py', '.yaml', '.yml', '.json', '.tex', '.sh']:
available_files["other"].append(rel)
# Remove empty categories
available_files = {k: v for k, v in available_files.items() if v}
return json.dumps({
"success": False,
"error": f"File '{file_path}' not found in skill '{name}'.",
"available_files": available_files,
"hint": "Use one of the available file paths listed above"
}, ensure_ascii=False)
# Read the file content
try:
content = target_file.read_text(encoding='utf-8')
except UnicodeDecodeError:
# Binary file - return info about it instead
return json.dumps({
"success": True,
"name": name,
"file": file_path,
"content": f"[Binary file: {target_file.name}, size: {target_file.stat().st_size} bytes]",
"is_binary": True
}, ensure_ascii=False)
return json.dumps({
"success": True,
"name": name,
"file": file_path,
"content": content,
"file_type": target_file.suffix
}, ensure_ascii=False)
# Read the main skill content
content = skill_md.read_text(encoding='utf-8')
frontmatter, body = _parse_frontmatter(content)
# Get reference, template, and script files if this is a directory-based skill
reference_files = []
template_files = []
script_files = []
if skill_dir:
# References (documentation)
references_dir = skill_dir / "references"
if references_dir.exists():
reference_files = [str(f.relative_to(skill_dir)) for f in references_dir.glob("*.md")]
# Templates (output formats, boilerplate)
templates_dir = skill_dir / "templates"
if templates_dir.exists():
for ext in ['*.md', '*.py', '*.yaml', '*.yml', '*.json', '*.tex', '*.sh']:
template_files.extend([str(f.relative_to(skill_dir)) for f in templates_dir.rglob(ext)])
# Scripts (executable helpers)
scripts_dir = skill_dir / "scripts"
if scripts_dir.exists():
for ext in ['*.py', '*.sh', '*.bash', '*.js', '*.ts', '*.rb']:
script_files.extend([str(f.relative_to(skill_dir)) for f in scripts_dir.glob(ext)])
# Parse metadata
tags = _parse_tags(frontmatter.get('tags', ''))
related_skills = _parse_tags(frontmatter.get('related_skills', ''))
# Build linked files structure for clear discovery
linked_files = {}
if reference_files:
linked_files["references"] = reference_files
if template_files:
linked_files["templates"] = template_files
if script_files:
linked_files["scripts"] = script_files
return json.dumps({
"success": True,
"name": frontmatter.get('name', skill_md.stem if not skill_dir else skill_dir.name),
"description": frontmatter.get('description', ''),
"tags": tags,
"related_skills": related_skills,
"content": content,
"path": str(skill_md.relative_to(SKILLS_DIR)),
"linked_files": linked_files if linked_files else None,
"usage_hint": "To view linked files, call skill_view(name, file_path) where file_path is e.g. 'references/api.md' or 'templates/config.yaml'" if linked_files else None
}, ensure_ascii=False)
except Exception as e:
return json.dumps({
"success": False,
"error": str(e)
}, ensure_ascii=False)
# Tool description for model_tools.py
SKILLS_TOOL_DESCRIPTION = """Access skill documents providing specialized instructions, guidelines, and executable knowledge.
Progressive disclosure workflow:
1. skills_list() - Returns metadata (name, description, tags, linked_file_count) for all skills
2. skill_view(name) - Loads full SKILL.md content + shows available linked_files (references/templates/scripts)
3. skill_view(name, file_path) - Loads specific linked file (e.g., 'references/api.md', 'scripts/train.py')
Skills may include:
- references/: Additional documentation, API specs, examples
- templates/: Output formats, config files, boilerplate code
- scripts/: Executable helpers (Python, shell scripts)"""
if __name__ == "__main__":
"""Test the skills tool"""
print("🎯 Skills Tool Test")
print("=" * 60)
# Test listing skills
print("\n📋 Listing all skills:")
result = json.loads(skills_list())
if result["success"]:
print(f"Found {result['count']} skills in {len(result.get('categories', []))} categories")
print(f"Categories: {result.get('categories', [])}")
print("\nFirst 10 skills:")
for skill in result["skills"][:10]:
cat = f"[{skill['category']}] " if skill.get('category') else ""
refs = f" (+{len(skill['reference_files'])} refs)" if skill.get('reference_files') else ""
print(f"{cat}{skill['name']}: {skill['description'][:60]}...{refs}")
else:
print(f"Error: {result['error']}")
# Test viewing a skill
print("\n📖 Viewing skill 'axolotl':")
result = json.loads(skill_view("axolotl"))
if result["success"]:
print(f"Name: {result['name']}")
print(f"Description: {result.get('description', 'N/A')[:100]}...")
print(f"Content length: {len(result['content'])} chars")
if result.get('reference_files'):
print(f"Reference files: {result['reference_files']}")
else:
print(f"Error: {result['error']}")
# Test viewing a reference file
print("\n📄 Viewing reference file 'axolotl/references/dataset-formats.md':")
result = json.loads(skill_view("axolotl", "references/dataset-formats.md"))
if result["success"]:
print(f"File: {result['file']}")
print(f"Content length: {len(result['content'])} chars")
print(f"Preview: {result['content'][:150]}...")
else:
print(f"Error: {result['error']}")