#!/usr/bin/env python3 """ Skill validation framework for Ezra. Validates SKILL.md files for completeness, structure, and quality. Epic: EZRA-SELF-001 / Phase 3 - Skill System Enhancement Author: Ezra (self-improvement) """ import re import yaml from pathlib import Path from typing import Optional class SkillValidationError: """A single validation finding.""" def __init__(self, level: str, message: str, field: str = ""): self.level = level # ERROR, WARNING, INFO self.message = message self.field = field def __repr__(self): prefix = {"ERROR": "❌", "WARNING": "⚠️", "INFO": "ℹ️"}.get(self.level, "?") field_str = f" [{self.field}]" if self.field else "" return f"{prefix} {self.level}{field_str}: {self.message}" class SkillValidator: """Validate SKILL.md files for quality and completeness.""" REQUIRED_FRONTMATTER = ["name", "description", "version"] RECOMMENDED_FRONTMATTER = ["author", "tags"] REQUIRED_SECTIONS = ["trigger", "steps"] RECOMMENDED_SECTIONS = ["pitfalls", "verification"] def __init__(self): self.errors = [] def validate_file(self, path: Path) -> list[SkillValidationError]: """Validate a single SKILL.md file.""" self.errors = [] path = Path(path) if not path.exists(): self.errors.append(SkillValidationError("ERROR", f"File not found: {path}", "file")) return self.errors content = path.read_text() if not content.strip(): self.errors.append(SkillValidationError("ERROR", "File is empty", "file")) return self.errors # Check YAML frontmatter frontmatter = self._parse_frontmatter(content) self._validate_frontmatter(frontmatter) # Check markdown body body = self._extract_body(content) self._validate_body(body) # Check directory structure self._validate_directory(path.parent) return self.errors def _parse_frontmatter(self, content: str) -> dict: """Extract YAML frontmatter.""" match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL) if not match: self.errors.append(SkillValidationError("ERROR", "No YAML frontmatter found (must start with ---)", "frontmatter")) return {} try: data = yaml.safe_load(match.group(1)) return data if isinstance(data, dict) else {} except yaml.YAMLError as e: self.errors.append(SkillValidationError("ERROR", f"Invalid YAML: {e}", "frontmatter")) return {} def _extract_body(self, content: str) -> str: """Extract markdown body after frontmatter.""" match = re.match(r'^---\s*\n.*?\n---\s*\n(.*)', content, re.DOTALL) return match.group(1) if match else content def _validate_frontmatter(self, fm: dict): """Validate frontmatter fields.""" for field in self.REQUIRED_FRONTMATTER: if field not in fm: self.errors.append(SkillValidationError("ERROR", f"Missing required field: {field}", "frontmatter")) elif not fm[field]: self.errors.append(SkillValidationError("ERROR", f"Empty required field: {field}", "frontmatter")) for field in self.RECOMMENDED_FRONTMATTER: if field not in fm: self.errors.append(SkillValidationError("WARNING", f"Missing recommended field: {field}", "frontmatter")) # Name validation if "name" in fm: name = str(fm["name"]) if not re.match(r'^[a-z0-9][a-z0-9_-]*$', name): self.errors.append(SkillValidationError("ERROR", f"Invalid name '{name}': use lowercase, hyphens, underscores", "frontmatter")) if len(name) > 64: self.errors.append(SkillValidationError("ERROR", f"Name too long ({len(name)} chars, max 64)", "frontmatter")) # Description length if "description" in fm and fm["description"]: desc = str(fm["description"]) if len(desc) < 10: self.errors.append(SkillValidationError("WARNING", "Description too short (< 10 chars)", "frontmatter")) if len(desc) > 200: self.errors.append(SkillValidationError("WARNING", "Description very long (> 200 chars)", "frontmatter")) # Version format if "version" in fm and fm["version"]: ver = str(fm["version"]) if not re.match(r'^\d+\.\d+(\.\d+)?$', ver): self.errors.append(SkillValidationError("WARNING", f"Non-semver version: {ver}", "frontmatter")) def _validate_body(self, body: str): """Validate markdown body structure.""" headers = re.findall(r'^#+\s+(.+)$', body, re.MULTILINE) headers_lower = [h.lower().strip() for h in headers] for section in self.REQUIRED_SECTIONS: found = any(section.lower() in h for h in headers_lower) if not found: self.errors.append(SkillValidationError("ERROR", f"Missing required section: {section}", "body")) for section in self.RECOMMENDED_SECTIONS: found = any(section.lower() in h for h in headers_lower) if not found: self.errors.append(SkillValidationError("WARNING", f"Missing recommended section: {section}", "body")) # Check for numbered steps steps_match = re.search(r'(?:^|\n)(?:#+\s+.*?(?:step|procedure|instructions).*?\n)(.*?)(?=\n#+\s|\Z)', body, re.IGNORECASE | re.DOTALL) if steps_match: steps_content = steps_match.group(1) numbered = re.findall(r'^\d+\.', steps_content, re.MULTILINE) if len(numbered) < 2: self.errors.append(SkillValidationError("WARNING", "Steps section has fewer than 2 numbered items", "body")) # Check for code blocks code_blocks = re.findall(r'```', body) if len(code_blocks) < 2: # Need at least one pair self.errors.append(SkillValidationError("INFO", "No code blocks found — consider adding examples", "body")) # Content length check word_count = len(body.split()) if word_count < 50: self.errors.append(SkillValidationError("WARNING", f"Very short body ({word_count} words)", "body")) def _validate_directory(self, skill_dir: Path): """Validate skill directory structure.""" valid_subdirs = {"references", "templates", "scripts", "assets"} for child in skill_dir.iterdir(): if child.is_dir() and child.name not in valid_subdirs: self.errors.append(SkillValidationError("WARNING", f"Non-standard subdirectory: {child.name}/", "directory")) def validate_all(self, skills_root: Path = None) -> dict: """Validate all skills under a root directory.""" skills_root = Path(skills_root or "/root/wizards/ezra/home/skills") results = {} for skill_md in sorted(skills_root.rglob("SKILL.md")): skill_name = skill_md.parent.name errors = self.validate_file(skill_md) results[skill_name] = { "path": str(skill_md), "errors": len([e for e in errors if e.level == "ERROR"]), "warnings": len([e for e in errors if e.level == "WARNING"]), "info": len([e for e in errors if e.level == "INFO"]), "findings": [repr(e) for e in errors], } return results def format_report(self, results: dict) -> str: """Format validation results as a report.""" lines = [ "# Skill Validation Report", f"**Skills scanned:** {len(results)}", "", ] total_errors = sum(r["errors"] for r in results.values()) total_warnings = sum(r["warnings"] for r in results.values()) lines.append(f"**Total:** {total_errors} errors, {total_warnings} warnings") lines.append("") # Sort by error count descending sorted_results = sorted(results.items(), key=lambda x: (x[1]["errors"], x[1]["warnings"]), reverse=True) for name, r in sorted_results: icon = "✅" if r["errors"] == 0 else "❌" lines.append(f"### {icon} {name}") if r["findings"]: for f in r["findings"]: lines.append(f" {f}") else: lines.append(" No issues found") lines.append("") return "\n".join(lines) if __name__ == "__main__": v = SkillValidator() results = v.validate_all() print(v.format_report(results))