Files
ezra-environment/tools/skill_validator.py

209 lines
8.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Skill validation framework for Ezra.
Validates SKILL.md files for completeness, structure, and quality.
Epic: EZRA-SELF-001 / Phase 3 - Skill System Enhancement
Author: Ezra (self-improvement)
"""
import re
import yaml
from pathlib import Path
from typing import Optional
class SkillValidationError:
"""A single validation finding."""
def __init__(self, level: str, message: str, field: str = ""):
self.level = level # ERROR, WARNING, INFO
self.message = message
self.field = field
def __repr__(self):
prefix = {"ERROR": "", "WARNING": "⚠️", "INFO": ""}.get(self.level, "?")
field_str = f" [{self.field}]" if self.field else ""
return f"{prefix} {self.level}{field_str}: {self.message}"
class SkillValidator:
"""Validate SKILL.md files for quality and completeness."""
REQUIRED_FRONTMATTER = ["name", "description", "version"]
RECOMMENDED_FRONTMATTER = ["author", "tags"]
REQUIRED_SECTIONS = ["trigger", "steps"]
RECOMMENDED_SECTIONS = ["pitfalls", "verification"]
def __init__(self):
self.errors = []
def validate_file(self, path: Path) -> list[SkillValidationError]:
"""Validate a single SKILL.md file."""
self.errors = []
path = Path(path)
if not path.exists():
self.errors.append(SkillValidationError("ERROR", f"File not found: {path}", "file"))
return self.errors
content = path.read_text()
if not content.strip():
self.errors.append(SkillValidationError("ERROR", "File is empty", "file"))
return self.errors
# Check YAML frontmatter
frontmatter = self._parse_frontmatter(content)
self._validate_frontmatter(frontmatter)
# Check markdown body
body = self._extract_body(content)
self._validate_body(body)
# Check directory structure
self._validate_directory(path.parent)
return self.errors
def _parse_frontmatter(self, content: str) -> dict:
"""Extract YAML frontmatter."""
match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
if not match:
self.errors.append(SkillValidationError("ERROR", "No YAML frontmatter found (must start with ---)", "frontmatter"))
return {}
try:
data = yaml.safe_load(match.group(1))
return data if isinstance(data, dict) else {}
except yaml.YAMLError as e:
self.errors.append(SkillValidationError("ERROR", f"Invalid YAML: {e}", "frontmatter"))
return {}
def _extract_body(self, content: str) -> str:
"""Extract markdown body after frontmatter."""
match = re.match(r'^---\s*\n.*?\n---\s*\n(.*)', content, re.DOTALL)
return match.group(1) if match else content
def _validate_frontmatter(self, fm: dict):
"""Validate frontmatter fields."""
for field in self.REQUIRED_FRONTMATTER:
if field not in fm:
self.errors.append(SkillValidationError("ERROR", f"Missing required field: {field}", "frontmatter"))
elif not fm[field]:
self.errors.append(SkillValidationError("ERROR", f"Empty required field: {field}", "frontmatter"))
for field in self.RECOMMENDED_FRONTMATTER:
if field not in fm:
self.errors.append(SkillValidationError("WARNING", f"Missing recommended field: {field}", "frontmatter"))
# Name validation
if "name" in fm:
name = str(fm["name"])
if not re.match(r'^[a-z0-9][a-z0-9_-]*$', name):
self.errors.append(SkillValidationError("ERROR", f"Invalid name '{name}': use lowercase, hyphens, underscores", "frontmatter"))
if len(name) > 64:
self.errors.append(SkillValidationError("ERROR", f"Name too long ({len(name)} chars, max 64)", "frontmatter"))
# Description length
if "description" in fm and fm["description"]:
desc = str(fm["description"])
if len(desc) < 10:
self.errors.append(SkillValidationError("WARNING", "Description too short (< 10 chars)", "frontmatter"))
if len(desc) > 200:
self.errors.append(SkillValidationError("WARNING", "Description very long (> 200 chars)", "frontmatter"))
# Version format
if "version" in fm and fm["version"]:
ver = str(fm["version"])
if not re.match(r'^\d+\.\d+(\.\d+)?$', ver):
self.errors.append(SkillValidationError("WARNING", f"Non-semver version: {ver}", "frontmatter"))
def _validate_body(self, body: str):
"""Validate markdown body structure."""
headers = re.findall(r'^#+\s+(.+)$', body, re.MULTILINE)
headers_lower = [h.lower().strip() for h in headers]
for section in self.REQUIRED_SECTIONS:
found = any(section.lower() in h for h in headers_lower)
if not found:
self.errors.append(SkillValidationError("ERROR", f"Missing required section: {section}", "body"))
for section in self.RECOMMENDED_SECTIONS:
found = any(section.lower() in h for h in headers_lower)
if not found:
self.errors.append(SkillValidationError("WARNING", f"Missing recommended section: {section}", "body"))
# Check for numbered steps
steps_match = re.search(r'(?:^|\n)(?:#+\s+.*?(?:step|procedure|instructions).*?\n)(.*?)(?=\n#+\s|\Z)', body, re.IGNORECASE | re.DOTALL)
if steps_match:
steps_content = steps_match.group(1)
numbered = re.findall(r'^\d+\.', steps_content, re.MULTILINE)
if len(numbered) < 2:
self.errors.append(SkillValidationError("WARNING", "Steps section has fewer than 2 numbered items", "body"))
# Check for code blocks
code_blocks = re.findall(r'```', body)
if len(code_blocks) < 2: # Need at least one pair
self.errors.append(SkillValidationError("INFO", "No code blocks found — consider adding examples", "body"))
# Content length check
word_count = len(body.split())
if word_count < 50:
self.errors.append(SkillValidationError("WARNING", f"Very short body ({word_count} words)", "body"))
def _validate_directory(self, skill_dir: Path):
"""Validate skill directory structure."""
valid_subdirs = {"references", "templates", "scripts", "assets"}
for child in skill_dir.iterdir():
if child.is_dir() and child.name not in valid_subdirs:
self.errors.append(SkillValidationError("WARNING", f"Non-standard subdirectory: {child.name}/", "directory"))
def validate_all(self, skills_root: Path = None) -> dict:
"""Validate all skills under a root directory."""
skills_root = Path(skills_root or "/root/wizards/ezra/home/skills")
results = {}
for skill_md in sorted(skills_root.rglob("SKILL.md")):
skill_name = skill_md.parent.name
errors = self.validate_file(skill_md)
results[skill_name] = {
"path": str(skill_md),
"errors": len([e for e in errors if e.level == "ERROR"]),
"warnings": len([e for e in errors if e.level == "WARNING"]),
"info": len([e for e in errors if e.level == "INFO"]),
"findings": [repr(e) for e in errors],
}
return results
def format_report(self, results: dict) -> str:
"""Format validation results as a report."""
lines = [
"# Skill Validation Report",
f"**Skills scanned:** {len(results)}",
"",
]
total_errors = sum(r["errors"] for r in results.values())
total_warnings = sum(r["warnings"] for r in results.values())
lines.append(f"**Total:** {total_errors} errors, {total_warnings} warnings")
lines.append("")
# Sort by error count descending
sorted_results = sorted(results.items(), key=lambda x: (x[1]["errors"], x[1]["warnings"]), reverse=True)
for name, r in sorted_results:
icon = "" if r["errors"] == 0 else ""
lines.append(f"### {icon} {name}")
if r["findings"]:
for f in r["findings"]:
lines.append(f" {f}")
else:
lines.append(" No issues found")
lines.append("")
return "\n".join(lines)
if __name__ == "__main__":
v = SkillValidator()
results = v.validate_all()
print(v.format_report(results))