209 lines
8.5 KiB
Python
209 lines
8.5 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
Skill validation framework for Ezra.
|
|||
|
|
Validates SKILL.md files for completeness, structure, and quality.
|
|||
|
|
|
|||
|
|
Epic: EZRA-SELF-001 / Phase 3 - Skill System Enhancement
|
|||
|
|
Author: Ezra (self-improvement)
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import re
|
|||
|
|
import yaml
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import Optional
|
|||
|
|
|
|||
|
|
|
|||
|
|
class SkillValidationError:
|
|||
|
|
"""A single validation finding."""
|
|||
|
|
def __init__(self, level: str, message: str, field: str = ""):
|
|||
|
|
self.level = level # ERROR, WARNING, INFO
|
|||
|
|
self.message = message
|
|||
|
|
self.field = field
|
|||
|
|
|
|||
|
|
def __repr__(self):
|
|||
|
|
prefix = {"ERROR": "❌", "WARNING": "⚠️", "INFO": "ℹ️"}.get(self.level, "?")
|
|||
|
|
field_str = f" [{self.field}]" if self.field else ""
|
|||
|
|
return f"{prefix} {self.level}{field_str}: {self.message}"
|
|||
|
|
|
|||
|
|
|
|||
|
|
class SkillValidator:
|
|||
|
|
"""Validate SKILL.md files for quality and completeness."""
|
|||
|
|
|
|||
|
|
REQUIRED_FRONTMATTER = ["name", "description", "version"]
|
|||
|
|
RECOMMENDED_FRONTMATTER = ["author", "tags"]
|
|||
|
|
REQUIRED_SECTIONS = ["trigger", "steps"]
|
|||
|
|
RECOMMENDED_SECTIONS = ["pitfalls", "verification"]
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
self.errors = []
|
|||
|
|
|
|||
|
|
def validate_file(self, path: Path) -> list[SkillValidationError]:
|
|||
|
|
"""Validate a single SKILL.md file."""
|
|||
|
|
self.errors = []
|
|||
|
|
path = Path(path)
|
|||
|
|
|
|||
|
|
if not path.exists():
|
|||
|
|
self.errors.append(SkillValidationError("ERROR", f"File not found: {path}", "file"))
|
|||
|
|
return self.errors
|
|||
|
|
|
|||
|
|
content = path.read_text()
|
|||
|
|
if not content.strip():
|
|||
|
|
self.errors.append(SkillValidationError("ERROR", "File is empty", "file"))
|
|||
|
|
return self.errors
|
|||
|
|
|
|||
|
|
# Check YAML frontmatter
|
|||
|
|
frontmatter = self._parse_frontmatter(content)
|
|||
|
|
self._validate_frontmatter(frontmatter)
|
|||
|
|
|
|||
|
|
# Check markdown body
|
|||
|
|
body = self._extract_body(content)
|
|||
|
|
self._validate_body(body)
|
|||
|
|
|
|||
|
|
# Check directory structure
|
|||
|
|
self._validate_directory(path.parent)
|
|||
|
|
|
|||
|
|
return self.errors
|
|||
|
|
|
|||
|
|
def _parse_frontmatter(self, content: str) -> dict:
|
|||
|
|
"""Extract YAML frontmatter."""
|
|||
|
|
match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
|
|||
|
|
if not match:
|
|||
|
|
self.errors.append(SkillValidationError("ERROR", "No YAML frontmatter found (must start with ---)", "frontmatter"))
|
|||
|
|
return {}
|
|||
|
|
try:
|
|||
|
|
data = yaml.safe_load(match.group(1))
|
|||
|
|
return data if isinstance(data, dict) else {}
|
|||
|
|
except yaml.YAMLError as e:
|
|||
|
|
self.errors.append(SkillValidationError("ERROR", f"Invalid YAML: {e}", "frontmatter"))
|
|||
|
|
return {}
|
|||
|
|
|
|||
|
|
def _extract_body(self, content: str) -> str:
|
|||
|
|
"""Extract markdown body after frontmatter."""
|
|||
|
|
match = re.match(r'^---\s*\n.*?\n---\s*\n(.*)', content, re.DOTALL)
|
|||
|
|
return match.group(1) if match else content
|
|||
|
|
|
|||
|
|
def _validate_frontmatter(self, fm: dict):
|
|||
|
|
"""Validate frontmatter fields."""
|
|||
|
|
for field in self.REQUIRED_FRONTMATTER:
|
|||
|
|
if field not in fm:
|
|||
|
|
self.errors.append(SkillValidationError("ERROR", f"Missing required field: {field}", "frontmatter"))
|
|||
|
|
elif not fm[field]:
|
|||
|
|
self.errors.append(SkillValidationError("ERROR", f"Empty required field: {field}", "frontmatter"))
|
|||
|
|
|
|||
|
|
for field in self.RECOMMENDED_FRONTMATTER:
|
|||
|
|
if field not in fm:
|
|||
|
|
self.errors.append(SkillValidationError("WARNING", f"Missing recommended field: {field}", "frontmatter"))
|
|||
|
|
|
|||
|
|
# Name validation
|
|||
|
|
if "name" in fm:
|
|||
|
|
name = str(fm["name"])
|
|||
|
|
if not re.match(r'^[a-z0-9][a-z0-9_-]*$', name):
|
|||
|
|
self.errors.append(SkillValidationError("ERROR", f"Invalid name '{name}': use lowercase, hyphens, underscores", "frontmatter"))
|
|||
|
|
if len(name) > 64:
|
|||
|
|
self.errors.append(SkillValidationError("ERROR", f"Name too long ({len(name)} chars, max 64)", "frontmatter"))
|
|||
|
|
|
|||
|
|
# Description length
|
|||
|
|
if "description" in fm and fm["description"]:
|
|||
|
|
desc = str(fm["description"])
|
|||
|
|
if len(desc) < 10:
|
|||
|
|
self.errors.append(SkillValidationError("WARNING", "Description too short (< 10 chars)", "frontmatter"))
|
|||
|
|
if len(desc) > 200:
|
|||
|
|
self.errors.append(SkillValidationError("WARNING", "Description very long (> 200 chars)", "frontmatter"))
|
|||
|
|
|
|||
|
|
# Version format
|
|||
|
|
if "version" in fm and fm["version"]:
|
|||
|
|
ver = str(fm["version"])
|
|||
|
|
if not re.match(r'^\d+\.\d+(\.\d+)?$', ver):
|
|||
|
|
self.errors.append(SkillValidationError("WARNING", f"Non-semver version: {ver}", "frontmatter"))
|
|||
|
|
|
|||
|
|
def _validate_body(self, body: str):
|
|||
|
|
"""Validate markdown body structure."""
|
|||
|
|
headers = re.findall(r'^#+\s+(.+)$', body, re.MULTILINE)
|
|||
|
|
headers_lower = [h.lower().strip() for h in headers]
|
|||
|
|
|
|||
|
|
for section in self.REQUIRED_SECTIONS:
|
|||
|
|
found = any(section.lower() in h for h in headers_lower)
|
|||
|
|
if not found:
|
|||
|
|
self.errors.append(SkillValidationError("ERROR", f"Missing required section: {section}", "body"))
|
|||
|
|
|
|||
|
|
for section in self.RECOMMENDED_SECTIONS:
|
|||
|
|
found = any(section.lower() in h for h in headers_lower)
|
|||
|
|
if not found:
|
|||
|
|
self.errors.append(SkillValidationError("WARNING", f"Missing recommended section: {section}", "body"))
|
|||
|
|
|
|||
|
|
# Check for numbered steps
|
|||
|
|
steps_match = re.search(r'(?:^|\n)(?:#+\s+.*?(?:step|procedure|instructions).*?\n)(.*?)(?=\n#+\s|\Z)', body, re.IGNORECASE | re.DOTALL)
|
|||
|
|
if steps_match:
|
|||
|
|
steps_content = steps_match.group(1)
|
|||
|
|
numbered = re.findall(r'^\d+\.', steps_content, re.MULTILINE)
|
|||
|
|
if len(numbered) < 2:
|
|||
|
|
self.errors.append(SkillValidationError("WARNING", "Steps section has fewer than 2 numbered items", "body"))
|
|||
|
|
|
|||
|
|
# Check for code blocks
|
|||
|
|
code_blocks = re.findall(r'```', body)
|
|||
|
|
if len(code_blocks) < 2: # Need at least one pair
|
|||
|
|
self.errors.append(SkillValidationError("INFO", "No code blocks found — consider adding examples", "body"))
|
|||
|
|
|
|||
|
|
# Content length check
|
|||
|
|
word_count = len(body.split())
|
|||
|
|
if word_count < 50:
|
|||
|
|
self.errors.append(SkillValidationError("WARNING", f"Very short body ({word_count} words)", "body"))
|
|||
|
|
|
|||
|
|
def _validate_directory(self, skill_dir: Path):
|
|||
|
|
"""Validate skill directory structure."""
|
|||
|
|
valid_subdirs = {"references", "templates", "scripts", "assets"}
|
|||
|
|
for child in skill_dir.iterdir():
|
|||
|
|
if child.is_dir() and child.name not in valid_subdirs:
|
|||
|
|
self.errors.append(SkillValidationError("WARNING", f"Non-standard subdirectory: {child.name}/", "directory"))
|
|||
|
|
|
|||
|
|
def validate_all(self, skills_root: Path = None) -> dict:
|
|||
|
|
"""Validate all skills under a root directory."""
|
|||
|
|
skills_root = Path(skills_root or "/root/wizards/ezra/home/skills")
|
|||
|
|
results = {}
|
|||
|
|
for skill_md in sorted(skills_root.rglob("SKILL.md")):
|
|||
|
|
skill_name = skill_md.parent.name
|
|||
|
|
errors = self.validate_file(skill_md)
|
|||
|
|
results[skill_name] = {
|
|||
|
|
"path": str(skill_md),
|
|||
|
|
"errors": len([e for e in errors if e.level == "ERROR"]),
|
|||
|
|
"warnings": len([e for e in errors if e.level == "WARNING"]),
|
|||
|
|
"info": len([e for e in errors if e.level == "INFO"]),
|
|||
|
|
"findings": [repr(e) for e in errors],
|
|||
|
|
}
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
def format_report(self, results: dict) -> str:
|
|||
|
|
"""Format validation results as a report."""
|
|||
|
|
lines = [
|
|||
|
|
"# Skill Validation Report",
|
|||
|
|
f"**Skills scanned:** {len(results)}",
|
|||
|
|
"",
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
total_errors = sum(r["errors"] for r in results.values())
|
|||
|
|
total_warnings = sum(r["warnings"] for r in results.values())
|
|||
|
|
|
|||
|
|
lines.append(f"**Total:** {total_errors} errors, {total_warnings} warnings")
|
|||
|
|
lines.append("")
|
|||
|
|
|
|||
|
|
# Sort by error count descending
|
|||
|
|
sorted_results = sorted(results.items(), key=lambda x: (x[1]["errors"], x[1]["warnings"]), reverse=True)
|
|||
|
|
|
|||
|
|
for name, r in sorted_results:
|
|||
|
|
icon = "✅" if r["errors"] == 0 else "❌"
|
|||
|
|
lines.append(f"### {icon} {name}")
|
|||
|
|
if r["findings"]:
|
|||
|
|
for f in r["findings"]:
|
|||
|
|
lines.append(f" {f}")
|
|||
|
|
else:
|
|||
|
|
lines.append(" No issues found")
|
|||
|
|
lines.append("")
|
|||
|
|
|
|||
|
|
return "\n".join(lines)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
v = SkillValidator()
|
|||
|
|
results = v.validate_all()
|
|||
|
|
print(v.format_report(results))
|