209 lines
8.5 KiB
Python
209 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Skill validation framework for Ezra.
|
||
Validates SKILL.md files for completeness, structure, and quality.
|
||
|
||
Epic: EZRA-SELF-001 / Phase 3 - Skill System Enhancement
|
||
Author: Ezra (self-improvement)
|
||
"""
|
||
|
||
import re
|
||
import yaml
|
||
from pathlib import Path
|
||
from typing import Optional
|
||
|
||
|
||
class SkillValidationError:
|
||
"""A single validation finding."""
|
||
def __init__(self, level: str, message: str, field: str = ""):
|
||
self.level = level # ERROR, WARNING, INFO
|
||
self.message = message
|
||
self.field = field
|
||
|
||
def __repr__(self):
|
||
prefix = {"ERROR": "❌", "WARNING": "⚠️", "INFO": "ℹ️"}.get(self.level, "?")
|
||
field_str = f" [{self.field}]" if self.field else ""
|
||
return f"{prefix} {self.level}{field_str}: {self.message}"
|
||
|
||
|
||
class SkillValidator:
|
||
"""Validate SKILL.md files for quality and completeness."""
|
||
|
||
REQUIRED_FRONTMATTER = ["name", "description", "version"]
|
||
RECOMMENDED_FRONTMATTER = ["author", "tags"]
|
||
REQUIRED_SECTIONS = ["trigger", "steps"]
|
||
RECOMMENDED_SECTIONS = ["pitfalls", "verification"]
|
||
|
||
def __init__(self):
|
||
self.errors = []
|
||
|
||
def validate_file(self, path: Path) -> list[SkillValidationError]:
|
||
"""Validate a single SKILL.md file."""
|
||
self.errors = []
|
||
path = Path(path)
|
||
|
||
if not path.exists():
|
||
self.errors.append(SkillValidationError("ERROR", f"File not found: {path}", "file"))
|
||
return self.errors
|
||
|
||
content = path.read_text()
|
||
if not content.strip():
|
||
self.errors.append(SkillValidationError("ERROR", "File is empty", "file"))
|
||
return self.errors
|
||
|
||
# Check YAML frontmatter
|
||
frontmatter = self._parse_frontmatter(content)
|
||
self._validate_frontmatter(frontmatter)
|
||
|
||
# Check markdown body
|
||
body = self._extract_body(content)
|
||
self._validate_body(body)
|
||
|
||
# Check directory structure
|
||
self._validate_directory(path.parent)
|
||
|
||
return self.errors
|
||
|
||
def _parse_frontmatter(self, content: str) -> dict:
|
||
"""Extract YAML frontmatter."""
|
||
match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
|
||
if not match:
|
||
self.errors.append(SkillValidationError("ERROR", "No YAML frontmatter found (must start with ---)", "frontmatter"))
|
||
return {}
|
||
try:
|
||
data = yaml.safe_load(match.group(1))
|
||
return data if isinstance(data, dict) else {}
|
||
except yaml.YAMLError as e:
|
||
self.errors.append(SkillValidationError("ERROR", f"Invalid YAML: {e}", "frontmatter"))
|
||
return {}
|
||
|
||
def _extract_body(self, content: str) -> str:
|
||
"""Extract markdown body after frontmatter."""
|
||
match = re.match(r'^---\s*\n.*?\n---\s*\n(.*)', content, re.DOTALL)
|
||
return match.group(1) if match else content
|
||
|
||
def _validate_frontmatter(self, fm: dict):
|
||
"""Validate frontmatter fields."""
|
||
for field in self.REQUIRED_FRONTMATTER:
|
||
if field not in fm:
|
||
self.errors.append(SkillValidationError("ERROR", f"Missing required field: {field}", "frontmatter"))
|
||
elif not fm[field]:
|
||
self.errors.append(SkillValidationError("ERROR", f"Empty required field: {field}", "frontmatter"))
|
||
|
||
for field in self.RECOMMENDED_FRONTMATTER:
|
||
if field not in fm:
|
||
self.errors.append(SkillValidationError("WARNING", f"Missing recommended field: {field}", "frontmatter"))
|
||
|
||
# Name validation
|
||
if "name" in fm:
|
||
name = str(fm["name"])
|
||
if not re.match(r'^[a-z0-9][a-z0-9_-]*$', name):
|
||
self.errors.append(SkillValidationError("ERROR", f"Invalid name '{name}': use lowercase, hyphens, underscores", "frontmatter"))
|
||
if len(name) > 64:
|
||
self.errors.append(SkillValidationError("ERROR", f"Name too long ({len(name)} chars, max 64)", "frontmatter"))
|
||
|
||
# Description length
|
||
if "description" in fm and fm["description"]:
|
||
desc = str(fm["description"])
|
||
if len(desc) < 10:
|
||
self.errors.append(SkillValidationError("WARNING", "Description too short (< 10 chars)", "frontmatter"))
|
||
if len(desc) > 200:
|
||
self.errors.append(SkillValidationError("WARNING", "Description very long (> 200 chars)", "frontmatter"))
|
||
|
||
# Version format
|
||
if "version" in fm and fm["version"]:
|
||
ver = str(fm["version"])
|
||
if not re.match(r'^\d+\.\d+(\.\d+)?$', ver):
|
||
self.errors.append(SkillValidationError("WARNING", f"Non-semver version: {ver}", "frontmatter"))
|
||
|
||
def _validate_body(self, body: str):
|
||
"""Validate markdown body structure."""
|
||
headers = re.findall(r'^#+\s+(.+)$', body, re.MULTILINE)
|
||
headers_lower = [h.lower().strip() for h in headers]
|
||
|
||
for section in self.REQUIRED_SECTIONS:
|
||
found = any(section.lower() in h for h in headers_lower)
|
||
if not found:
|
||
self.errors.append(SkillValidationError("ERROR", f"Missing required section: {section}", "body"))
|
||
|
||
for section in self.RECOMMENDED_SECTIONS:
|
||
found = any(section.lower() in h for h in headers_lower)
|
||
if not found:
|
||
self.errors.append(SkillValidationError("WARNING", f"Missing recommended section: {section}", "body"))
|
||
|
||
# Check for numbered steps
|
||
steps_match = re.search(r'(?:^|\n)(?:#+\s+.*?(?:step|procedure|instructions).*?\n)(.*?)(?=\n#+\s|\Z)', body, re.IGNORECASE | re.DOTALL)
|
||
if steps_match:
|
||
steps_content = steps_match.group(1)
|
||
numbered = re.findall(r'^\d+\.', steps_content, re.MULTILINE)
|
||
if len(numbered) < 2:
|
||
self.errors.append(SkillValidationError("WARNING", "Steps section has fewer than 2 numbered items", "body"))
|
||
|
||
# Check for code blocks
|
||
code_blocks = re.findall(r'```', body)
|
||
if len(code_blocks) < 2: # Need at least one pair
|
||
self.errors.append(SkillValidationError("INFO", "No code blocks found — consider adding examples", "body"))
|
||
|
||
# Content length check
|
||
word_count = len(body.split())
|
||
if word_count < 50:
|
||
self.errors.append(SkillValidationError("WARNING", f"Very short body ({word_count} words)", "body"))
|
||
|
||
def _validate_directory(self, skill_dir: Path):
|
||
"""Validate skill directory structure."""
|
||
valid_subdirs = {"references", "templates", "scripts", "assets"}
|
||
for child in skill_dir.iterdir():
|
||
if child.is_dir() and child.name not in valid_subdirs:
|
||
self.errors.append(SkillValidationError("WARNING", f"Non-standard subdirectory: {child.name}/", "directory"))
|
||
|
||
def validate_all(self, skills_root: Path = None) -> dict:
|
||
"""Validate all skills under a root directory."""
|
||
skills_root = Path(skills_root or "/root/wizards/ezra/home/skills")
|
||
results = {}
|
||
for skill_md in sorted(skills_root.rglob("SKILL.md")):
|
||
skill_name = skill_md.parent.name
|
||
errors = self.validate_file(skill_md)
|
||
results[skill_name] = {
|
||
"path": str(skill_md),
|
||
"errors": len([e for e in errors if e.level == "ERROR"]),
|
||
"warnings": len([e for e in errors if e.level == "WARNING"]),
|
||
"info": len([e for e in errors if e.level == "INFO"]),
|
||
"findings": [repr(e) for e in errors],
|
||
}
|
||
return results
|
||
|
||
def format_report(self, results: dict) -> str:
|
||
"""Format validation results as a report."""
|
||
lines = [
|
||
"# Skill Validation Report",
|
||
f"**Skills scanned:** {len(results)}",
|
||
"",
|
||
]
|
||
|
||
total_errors = sum(r["errors"] for r in results.values())
|
||
total_warnings = sum(r["warnings"] for r in results.values())
|
||
|
||
lines.append(f"**Total:** {total_errors} errors, {total_warnings} warnings")
|
||
lines.append("")
|
||
|
||
# Sort by error count descending
|
||
sorted_results = sorted(results.items(), key=lambda x: (x[1]["errors"], x[1]["warnings"]), reverse=True)
|
||
|
||
for name, r in sorted_results:
|
||
icon = "✅" if r["errors"] == 0 else "❌"
|
||
lines.append(f"### {icon} {name}")
|
||
if r["findings"]:
|
||
for f in r["findings"]:
|
||
lines.append(f" {f}")
|
||
else:
|
||
lines.append(" No issues found")
|
||
lines.append("")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
v = SkillValidator()
|
||
results = v.validate_all()
|
||
print(v.format_report(results))
|