Files
ezra-environment/tools/skill_validator.py

209 lines
8.5 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Skill validation framework for Ezra.
Validates SKILL.md files for completeness, structure, and quality.
Epic: EZRA-SELF-001 / Phase 3 - Skill System Enhancement
Author: Ezra (self-improvement)
"""
import re
import yaml
from pathlib import Path
from typing import Optional
class SkillValidationError:
"""A single validation finding."""
def __init__(self, level: str, message: str, field: str = ""):
self.level = level # ERROR, WARNING, INFO
self.message = message
self.field = field
def __repr__(self):
prefix = {"ERROR": "", "WARNING": "⚠️", "INFO": ""}.get(self.level, "?")
field_str = f" [{self.field}]" if self.field else ""
return f"{prefix} {self.level}{field_str}: {self.message}"
class SkillValidator:
"""Validate SKILL.md files for quality and completeness."""
REQUIRED_FRONTMATTER = ["name", "description", "version"]
RECOMMENDED_FRONTMATTER = ["author", "tags"]
REQUIRED_SECTIONS = ["trigger", "steps"]
RECOMMENDED_SECTIONS = ["pitfalls", "verification"]
def __init__(self):
self.errors = []
def validate_file(self, path: Path) -> list[SkillValidationError]:
"""Validate a single SKILL.md file."""
self.errors = []
path = Path(path)
if not path.exists():
self.errors.append(SkillValidationError("ERROR", f"File not found: {path}", "file"))
return self.errors
content = path.read_text()
if not content.strip():
self.errors.append(SkillValidationError("ERROR", "File is empty", "file"))
return self.errors
# Check YAML frontmatter
frontmatter = self._parse_frontmatter(content)
self._validate_frontmatter(frontmatter)
# Check markdown body
body = self._extract_body(content)
self._validate_body(body)
# Check directory structure
self._validate_directory(path.parent)
return self.errors
def _parse_frontmatter(self, content: str) -> dict:
"""Extract YAML frontmatter."""
match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
if not match:
self.errors.append(SkillValidationError("ERROR", "No YAML frontmatter found (must start with ---)", "frontmatter"))
return {}
try:
data = yaml.safe_load(match.group(1))
return data if isinstance(data, dict) else {}
except yaml.YAMLError as e:
self.errors.append(SkillValidationError("ERROR", f"Invalid YAML: {e}", "frontmatter"))
return {}
def _extract_body(self, content: str) -> str:
"""Extract markdown body after frontmatter."""
match = re.match(r'^---\s*\n.*?\n---\s*\n(.*)', content, re.DOTALL)
return match.group(1) if match else content
def _validate_frontmatter(self, fm: dict):
"""Validate frontmatter fields."""
for field in self.REQUIRED_FRONTMATTER:
if field not in fm:
self.errors.append(SkillValidationError("ERROR", f"Missing required field: {field}", "frontmatter"))
elif not fm[field]:
self.errors.append(SkillValidationError("ERROR", f"Empty required field: {field}", "frontmatter"))
for field in self.RECOMMENDED_FRONTMATTER:
if field not in fm:
self.errors.append(SkillValidationError("WARNING", f"Missing recommended field: {field}", "frontmatter"))
# Name validation
if "name" in fm:
name = str(fm["name"])
if not re.match(r'^[a-z0-9][a-z0-9_-]*$', name):
self.errors.append(SkillValidationError("ERROR", f"Invalid name '{name}': use lowercase, hyphens, underscores", "frontmatter"))
if len(name) > 64:
self.errors.append(SkillValidationError("ERROR", f"Name too long ({len(name)} chars, max 64)", "frontmatter"))
# Description length
if "description" in fm and fm["description"]:
desc = str(fm["description"])
if len(desc) < 10:
self.errors.append(SkillValidationError("WARNING", "Description too short (< 10 chars)", "frontmatter"))
if len(desc) > 200:
self.errors.append(SkillValidationError("WARNING", "Description very long (> 200 chars)", "frontmatter"))
# Version format
if "version" in fm and fm["version"]:
ver = str(fm["version"])
if not re.match(r'^\d+\.\d+(\.\d+)?$', ver):
self.errors.append(SkillValidationError("WARNING", f"Non-semver version: {ver}", "frontmatter"))
def _validate_body(self, body: str):
"""Validate markdown body structure."""
headers = re.findall(r'^#+\s+(.+)$', body, re.MULTILINE)
headers_lower = [h.lower().strip() for h in headers]
for section in self.REQUIRED_SECTIONS:
found = any(section.lower() in h for h in headers_lower)
if not found:
self.errors.append(SkillValidationError("ERROR", f"Missing required section: {section}", "body"))
for section in self.RECOMMENDED_SECTIONS:
found = any(section.lower() in h for h in headers_lower)
if not found:
self.errors.append(SkillValidationError("WARNING", f"Missing recommended section: {section}", "body"))
# Check for numbered steps
steps_match = re.search(r'(?:^|\n)(?:#+\s+.*?(?:step|procedure|instructions).*?\n)(.*?)(?=\n#+\s|\Z)', body, re.IGNORECASE | re.DOTALL)
if steps_match:
steps_content = steps_match.group(1)
numbered = re.findall(r'^\d+\.', steps_content, re.MULTILINE)
if len(numbered) < 2:
self.errors.append(SkillValidationError("WARNING", "Steps section has fewer than 2 numbered items", "body"))
# Check for code blocks
code_blocks = re.findall(r'```', body)
if len(code_blocks) < 2: # Need at least one pair
self.errors.append(SkillValidationError("INFO", "No code blocks found — consider adding examples", "body"))
# Content length check
word_count = len(body.split())
if word_count < 50:
self.errors.append(SkillValidationError("WARNING", f"Very short body ({word_count} words)", "body"))
def _validate_directory(self, skill_dir: Path):
"""Validate skill directory structure."""
valid_subdirs = {"references", "templates", "scripts", "assets"}
for child in skill_dir.iterdir():
if child.is_dir() and child.name not in valid_subdirs:
self.errors.append(SkillValidationError("WARNING", f"Non-standard subdirectory: {child.name}/", "directory"))
def validate_all(self, skills_root: Path = None) -> dict:
"""Validate all skills under a root directory."""
skills_root = Path(skills_root or "/root/wizards/ezra/home/skills")
results = {}
for skill_md in sorted(skills_root.rglob("SKILL.md")):
skill_name = skill_md.parent.name
errors = self.validate_file(skill_md)
results[skill_name] = {
"path": str(skill_md),
"errors": len([e for e in errors if e.level == "ERROR"]),
"warnings": len([e for e in errors if e.level == "WARNING"]),
"info": len([e for e in errors if e.level == "INFO"]),
"findings": [repr(e) for e in errors],
}
return results
def format_report(self, results: dict) -> str:
"""Format validation results as a report."""
lines = [
"# Skill Validation Report",
f"**Skills scanned:** {len(results)}",
"",
]
total_errors = sum(r["errors"] for r in results.values())
total_warnings = sum(r["warnings"] for r in results.values())
lines.append(f"**Total:** {total_errors} errors, {total_warnings} warnings")
lines.append("")
# Sort by error count descending
sorted_results = sorted(results.items(), key=lambda x: (x[1]["errors"], x[1]["warnings"]), reverse=True)
for name, r in sorted_results:
icon = "" if r["errors"] == 0 else ""
lines.append(f"### {icon} {name}")
if r["findings"]:
for f in r["findings"]:
lines.append(f" {f}")
else:
lines.append(" No issues found")
lines.append("")
return "\n".join(lines)
if __name__ == "__main__":
v = SkillValidator()
results = v.validate_all()
print(v.format_report(results))